perm filename MANUAL.TEX[TEX,DEK]45 blob
sn#707854 filedate 1983-04-17 generic text, type C, neo UTF8
COMMENT ⊗ VALID 00047 PAGES
C REC PAGE DESCRIPTION
C00001 00001
C00006 00002 % This manual is copyright (C) 1983 by the American Mathematical Society.
C00007 00003 % temporary cover page
C00009 00004 % title
C00011 00005 % copyright
C00014 00006 % dedication
C00015 00007 % blank page
C00016 00008 % the preface
C00029 00009 % the table of contents
C00032 00010 \beginchapter Chapter 1. The Name of\\the Game
C00037 00011 \beginchapter Chapter 2. Book Printing\\versus\\Ordinary Typing
C00052 00012 \beginchapter Chapter 3. Controlling\\\TeX
C00077 00013 \beginchapter Chapter 4. Fonts\\of Type
C00099 00014 \beginchapter Chapter 5. Grouping
C00116 00015 \beginchapter Chapter 6. Running\\\TeX
C00175 00016 \beginchapter Chapter 7. How \TeX\ Reads\\What You Type
C00204 00017 \beginchapter Chapter 8. The Characters\\You Type
C00245 00018 \beginchapter Chapter 9. \TeX's Roman Fonts
C00266 00019 \beginchapter Chapter 10. Dimensions
C00292 00020 \beginchapter Chapter 11. Boxes
C00321 00021 \beginchapter Chapter 12. Glue
C00401 00022 \beginchapter Chapter 13. Modes
C00428 00023 \beginchapter Chapter 14. How \TeX\ Breaks\\Paragraphs Into Lines
C00530 00024 \beginchapter Chapter 15. How \TeX\ Makes\\Lines Into Pages
C00629 00025 \beginchapter Chapter 16. Typing\\Math Formulas
C00686 00026 \beginchapter Chapter 17. More about Math
C00792 00027 \beginchapter Chapter 18. Fine Points of\\Mathematics Typing
C00912 00028 \beginchapter Chapter 19. Displayed Equations
C00978 00029 \beginchapter Chapter 20. Definitions\\(also called Macros)
C01100 00030 \beginchapter Chapter 21. Making Boxes
C01147 00031 \beginchapter Chapter 22. Alignment
C01258 00032 \beginchapter Chapter 23. Output Routines
C01260 00033 \beginchapter Chapter 24. Summary of\\Vertical Mode
C01262 00034 \beginchapter Chapter 25. Summary of\\Horizontal Mode
C01263 00035 \beginchapter Chapter 26. Summary of\\Math Mode
C01265 00036 \beginchapter Chapter 27. Recovery from\\Errors
C01266 00037 \beginchapter Appendix A. Answers to\\All the\\Exercises
C01268 00038 \beginchapter Appendix B. Basic\\Control\\Sequences
C01270 00039 \beginchapter Appendix C. Character\\Codes
C01272 00040 \beginchapter Appendix D. Dirty Tricks
C01274 00041 \beginchapter Appendix E. Example Formats
C01276 00042 \beginchapter Appendix F. Font Tables
C01278 00043 \beginchapter Appendix G. Generating Boxes\\from Formulas
C01324 00044 \beginchapter Appendix H. Hyphenation
C01327 00045 \beginchapter Appendix I. Index
C01330 00046 \beginchapter Appendix J. Joining the\\\TeX\ Community
C01332 00047 \end
C01333 ENDMK
C⊗;
% This manual is copyright (C) 1983 by the American Mathematical Society.
% All rights are reserved!
% The file is distributed only for people to see its examples of TeX input,
% not for use in the preparation of books like The TeXbook.
% Permission for any other use of this file must be obtained in writing
% from the copyright holder and also from the publisher (Addison-Wesley).
\loop\iftrue
\errmessage{This manual is copyrighted and should not be TeXed}\repeat
\input manhdr
% temporary cover page
% Font change note: I want to make the big E on the cover page a bit narrower
\titlepage
\setcount0=-1983
\line{\manual !PQ\hfill
!\kern-10pt\lower13pt\hbox{'}\kern-5pt "BRRS} % that's The TeXbook
\vskip 1in
\rightline{April 3, 1983}
\vskip 6pt
\rightline{(Preliminary edition for people who can't wait.)}
\vfill
\rightline{Please be nice to the author until he is}
\rightline{finished writing the remaining chapters.}
\vfill
\rightline{The fine print in the upper right-hand}
\rightline{corner of each page is a draft of intended}
\rightline{index entries; it won't appear in the real book.}
\rightline{Some index entries will be in |typewriter type|}
\rightline{and/or preceded by {\tt\char`\\} or enclosed in \<$\ldots$>, etc;}
\rightline{such typographic distinctions aren't shown here.}
\rightline{An index entry often extends for several pages;}
\rightline{the actual scope will be determined later.}
\rightline{Please note things that should be indexed but aren't.}
%\vfill
\eject
% title
\setcount0=-1 % the front matter is numbered with roman numerals
\font\auth=ambig at 21pt % used only on the title page
\font\elevenbf=ambx10 at 11pt % ditto
\font\elevenit=cmti10 at 11pt % ditto
\font\elevenrm=amr10 at 11pt % ditto
\titlepage
\line{\manual !PQ\hfill
!\kern-10pt\lower13pt\hbox{'}\kern-5pt "BRRS} % that's The TeXbook
↑(Knuth, Donald Ervin)
↑(Bibby, Duane Robert)
\vskip 1pc
\baselineskip 13pt \elevenbf
\halign to\the\hsize{#\hfil\tabskip 0pt plus 1fil\hfil\tabskip0pt\cr
\kern5.5mm\auth DONALD \kern-1pt E. \kern-1pt KNUTH&
\elevenit Stanford University\cr
\noalign{\vskip 12pc}
&\elevenit Illustrations by\cr
&DUANE BIBBY\cr
\noalign{\vfill}
&ADDISON-\kern-1pt WESLEY\cr
&PUBLISHING COMP\kern-.13emANY\kern-1.5mm\cr
\noalign{\vskip.5pc \global\elevenrm}
&Reading, Massachusetts\cr
&Menlo Park, California\cr
&London\enspace$\cdot$\enspace Amsterdam\cr
&Don Mills, Ontario\enspace$\cdot$\enspace Sydney\cr}
\eject
% copyright
\titlepage
\eightpoint
\vbox to 8pc{}
\noindent\strut
The quotation on page \sesame\ is copyright $\copyright$ 1970 by Sesame
Street, Inc., and used by permission of the Children's Television Workshop.
\medskip
\noindent
\TeX\ is a trademark of the American Mathematical Society.
\bigskip\medskip
\noindent
{\bf Library of Congress cataloging in publication data}
\medskip
{\tt\halign{#\hfil\cr
Knuth, Donald Ervin (1938-\cr
\ \ \ The TeXbook.\cr
\noalign{\medskip}
\ \ \ Includes index.\cr
\ \ \ 1.@TeX (Computer system).\ \ 2.@Computerized\cr
typesetting.\ \ 3.@Mathematics printing.\ \ I.@Title.\cr
Z253.4.T47K58\ \ 1983\ \ \ \ \ \ \ \ \ 686.2\char127 2544\ \ \ \ \ \ 83-830\cr
ISBN 0-201-13448-9\cr}}
\vfill
\noindent
Copyright $\copyright$ 1983 by the American Mathematical Society
\smallskip
\noindent
This book is published jointly by the American Mathematical Society
and Addison-\kern-1ptWesley Publishing Company.
All rights reserved. No part of this publication may be reproduced, stored in
a retrieval system, or transmitted, in any form or by any means,
electronic, mechanical, photocopying, recording, or otherwise, without
the prior written permission of the publishers. Printed in the United
States of America. Published simultaneously in Canada.
\medskip
\noindent
ISBN 0-201-13448-9\par
\noindent
ABCDEFGHIJ--HA--89876543
↑(Knuth, Donald Ervin)
↑(:copyright)
\eject
% dedication
\titlepage
\vbox to 8pc{}
\rightline{\strut\eightsss To Jill:}
↑(Knuth, Jill Carter)
\vskip2pt
\rightline{\eightsss For your books and brochures}
\vfill
\eject
% blank page
\titlepage
\null\vfill
\eject
% the preface
\titlepage
\def\rhead{Preface}
\vbox to 8pc{
\rightline{\titlefont Preface}\vss}
{\topskip 9pc % this makes equal sinkage throughout the Preface
\vskip\minusthe\parskip
\tenpoint
\noindent\hang\hangafter-2
\hbox to 0pt{\hskip\minusthe\hangindent\manual\char'176\hfill}\hskip-16pt
{\sc ENTLE} R{\sc EADER}: \strut This is a handbook about
\TeX, a new typesetting system intended for the creation
of beautiful books---and especially for books that contain a lot of
mathematics. By preparing a manuscript in \TeX\ format, you will be
telling a computer exactly how the manuscript is to be transformed into
pages whose typographic quality is comparable to that of the world's
finest printers; yet you won't need to do much more work than would be
involved if you were simply typing the manuscript on an ordinary
typewriter. In fact, your total work will probably be significantly less,
if you consider the time it ordinarily takes to revise a typewritten manuscript,
since computer text files are so easy to change and to reprocess. \
(If such claims sound too good to be true, keep in mind that they were made
by \TeX's designer, on a day when \TeX\ happened to
be working, so the statements may be biased; but read on anyway.)
This manual is intended for people who have never used \TeX\ before, as
well as for experienced \TeX\ hackers. In other words, it's supposed to
be a panacea that satisfies everybody, at the risk of satisfying nobody.
Everything you need to know about \TeX\ is explained
here somewhere, and so are a lot of things that most users don't care about.
If you are preparing a simple manuscript, you won't need to
learn much about \TeX\ at all; on the other hand, some
things that go into the printing of technical books are inherently
difficult, and if you wish to achieve more complex effects you
will want to penetrate some of \TeX's darker corners. In order
to make it possible for many types of users to read this manual
effectively, a special sign is used to designate material that is
for wizards only: When the symbol
$$\vbox{\hbox{\manual\char'177}\vskip 11pt}$$
appears at the beginning of a paragraph, it warns of a ``↑{dangerous bend}''
in the train of thought; don't read the paragraph unless you need to.
Brave and experienced drivers at the controls of \TeX\ will gradually enter
more and more of these hazardous areas, but for most applications the
details won't matter.
All that you really ought to know, before reading on, is how to get a
file of text into your computer using a standard editing program. This
manual explains what that file ought to look like so that \TeX\ will
understand it, but basic computer usage is not explained here.
Some previous experience with technical typing will be quite helpful
if you plan to do heavily mathematical work with \TeX, although it
is not absolutely necessary. \TeX\ will do most of the necessary
formatting of equations automatically; but users with more experience
will be able to obtain better results, since there are so many ways
to deal with formulas.
Some of the paragraphs in this manual are so esoteric that they are rated
$$\vcenter{\hbox{\manual\char'177\kern1pt\char'177}\vskip 11pt}\;;$$
everything that was said about single dangerous-bend signs goes double
for these. You should probably have at least a month's experience with
\TeX\ before you attempt to fathom such doubly dangerous depths
of the system; in fact, most people will never need to know \TeX\
in this much detail, even if they use it every day. After all, it's
possible to drive a car without knowing how the engine works.
Yet the whole story is here in case you're curious. \ (About \TeX, not cars.)
The reason for such different levels of complexity is that people change
as they grow accustomed to any powerful tool. When you first try to use \TeX,
you'll find that some parts of it are very easy, while other things will take
some getting used to. A day or so later, after you have successfully typeset a
few pages, you'll be a different person; the concepts that used to bother you
will now seem natural, and you'll be able to picture the final result in
your mind before it comes out of the machine. But you'll probably run into
challenges of a different kind. After another week your perspective will
change again, and you'll grow in yet another way; and so on. As years go by,
you might become involved with many different kinds of typesetting; and
you'll find that your usage of \TeX\ will keep changing as your experience
builds. That's the way it is with any powerful tool: there's always more
to learn, and there are always better ways to do what you've done before.
At every stage in the development you'll want a slightly different sort of
manual. You may even want to write one yourself. By paying attention to
the dangerous bend signs in this book you'll be better able to focus on
the level that interests you at a particular time.
Computer system manuals usually make dull reading, but take heart:
This one contains {\sc ↑{JOKES}} every once in a while, so you might actually
enjoy reading it. \ (However, most of the jokes can only be appreciated
properly if you understand a technical point that is being made---so
read {\sl carefully}.)
Another noteworthy characteristic of this manual is that it doesn't
always tell the truth. When certain concepts of \TeX\ are introduced
informally, general rules will be stated; afterwards you will find that the
rules aren't strictly true. In general, the later chapters contain more
reliable information than the earlier ones do. The author feels that this
technique of deliberate lying will actually make it easier for you to
learn the ideas. Once you understand a simple but false rule, it will not
be hard to supplement that rule with its exceptions.
In order to help you internalize what you're reading,
{\sc ↑{EXERCISES}} are sprinkled through this manual. It is generally intended
that every reader should try every exercise, except for questions that appear
in the ``dangerous bend'' areas. If you can't solve a problem, you
can always look up the answer.
But please, try first to solve it by yourself; then you'll learn more
and you'll learn faster. Furthermore, if you think you do know the solution,
you should turn to Appendix@A and check it out, just to make sure.
The \TeX\ language described in this book is similar to the author's first
attempt at a document formatting language, but the new system differs
from the old@one in literally thousands of details. Both languages have
been called \TeX; but henceforth the old language should be called
\TeX78, and its use should rapidly fade away. Let's keep the name \TeX\
for the language described here, since it is so much better, and since
it is not going to change any more. ↑(TeX78)
I wish to thank the hundreds of people who have helped me to formulate
this ``definitive edition'' of the \TeX\ language, based on their
experiences with preliminary versions of the system. My work at Stanford
has been generously supported by the ↑{National Science Foundation}, the
↑{Office of Naval Research}, the ↑{IBM Corporation}, and the ↑{System
Development Foundation}. I also wish to thank the ↑{American Mathematical
Society} for its encouragement, for establishing the \TeX\ Users Group,
and for publishing the {\sl ↑{TUGboat}\/} newsletter (see Appendix@J).
\medskip
\line{{\sl Stanford, California}\hfil--- D. E. K.}↑(Knuth, Don)
\line{\sl June 1983\hfil}
} % end of the special \topskip
\endchapter
`Tis pleasant, sure, to see one's name in print;
A book's a book, although there's nothing in 't.
\author ↑{BYRON}, {\sl English Bards and Scotch Reviewers\/} (1809)
\bigskip
A question arose as to whether we were covering the field
that it was intended we should fill with this manual.
\author RICHARD R. ↑{DONNELLEY}, {\sl Proceedings, United %
Typothet{\oldeightsss\ae} of America\/} (1897)
\eject
% the table of contents
\titlepage
\vbox to 8pc{
\rightline{\titlefont Contents}
\vfill}
↑(Contents of this manual, table)
\def\rhead{Table of Contents}
\tenpoint
\begingroup
\def\diamondleaders{\global\advcount255 by 1
\ifodd\count255 \kern-10pt \fi
\leaders\hbox to 20pt{\ifodd\count255 \kern13pt \else\kern3pt \fi
.\hss}}
\baselineskip 15pt plus 5pt
\def\\#1. #2. #3.{\line{\strut
\hbox to\the\parindent{\bf\hbox to 1em{\hss#1}\hss}%
\rm#2\diamondleaders\hfil\hbox to 2em{\hss#3}}}
\\1. The Name of the Game. 1.
\\2. Book Printing versus Ordinary Typing. 3.
\\3. Controlling \TeX. 7.
\\4. Fonts of Type. 13.
\\5. Grouping. 19.
\\6. Running \TeX. 23.
\\7. How \TeX\ Reads What You Type. 37.
\\8. The Characters You Type. 43.
\\9. \TeX's Roman Fonts. 51.
\\10. Dimensions. 57.
\\11. Boxes. 63.
\\12. Glue. 69.
\\13. Modes. 85.
\\14. How \TeX\ Breaks Paragraphs Into Lines. 91.
\\15. How \TeX\ Makes Lines Into Pages. 109.
\\16. Typing Math Formulas. 127.
\\17. More about Math. 139.
\\18. Fine Points of Mathematics Typing. 161.
\\19. Displayed Equations. 185.
\\20. Definitions (also called Macros). 199.
\\21. Making Boxes. 221.
\\22. Alignment. 231.
\\23. Output Routines. 251.
\eject
\vbox to 8pc{}
\\24. Summary of Vertical Mode. xxx.
\\25. Summary of Horizontal Mode. xxx.
\\26. Summary of Math Mode. xxx.
\\27. Recovery from Errors. xxx.
\null
\leftline{\indent\bf Appendixes}
\\A. Answers to All the Exercises. xxx.
\\B. Basic Control Sequences. xxx.
\\C. Character Codes. xxx.
\\D. Dirty Tricks. xxx.
\\E. Example Formats. xxx.
\\F. Font Tables. xxx.
\\G. Generating Boxes from Formulas. xxx.
\\H. Hyphenation. xxx.
\\I\hskip 1pt. Index. xxx.
\\J. Joining the \TeX\ Community. xxx.
\null % 17 lines so far to balance the 23 on the other page
\null % 18
\null % 19
\null % 20
\null % 21
\null % 22
\null % 23
\eject
\endgroup
\beginchapter Chapter 1. The Name of\\the Game
\setcount0=1 % This is page number 1, number 1,
English words like `technology' stem from a Greek root beginning with
the letters $\tau\epsilon\chi\ldots\,$; and this same Greek word means {\sl
art\/} as well as technology. Hence the name \TeX, which is an
upper-case form of $\tau\epsilon\chi$.↑(TeX {(actually \TeX)}, meaning of)
↑(:tau)↑(:epsilon)↑(:chi)
Insiders pronounce the $\chi$ of \TeX\ as a Greek chi, not as an `x', so that
\TeX\ rhymes with the word blecchhh. It's the `ch' sound in Scottish words
like {\sl loch\/} or German words like {\sl ach\/}; it's a Spanish `j' and a
Russian `kh'. When you say it correctly to your computer, the terminal
may become slightly moist.
The purpose of this pronunciation exercise is to remind you that \TeX\ is
primarily concerned with high-quality technical manuscripts: its emphasis is
on art and technology, as in the underlying Greek word. If you merely want
to produce a passably good document---something acceptable and basically
readable but not really beautiful---a simpler system will usually suffice.
With \TeX\ the goal is to produce the {\sl finest\/} quality; this requires
more attention to detail, but you will not find it much harder to go the
extra distance, and you'll be able to take special pride in the finished
product.
On the other hand, it's important to notice another thing about \TeX's name:
There's something funny about the `E', which is out of kilter. This ↑(logo)
displaced `E' is a reminder that \TeX\ is about typesetting, and it
distinguishes \TeX\ from other system names. In fact, `↑{TEX}' (pronounced
{\sl tecks\/}) is the admirable {\sl Text EXecutive\/} processor developed by
↑{Honeywell Information Systems}. Since these two system names are
pronounced quite differently, they should also be spelled differently. The
correct way to refer to \TeX\ in a computer file, or when using some other
medium that doesn't allow lowering of the `E', is to type `↑{.TeX}'. Then
there will be no confusion with other similar names, and people will be
primed to pronounce everything properly.
\exercise After you have mastered the material in this book, what will
you be: A \TeX pert, or a \TeX nician?
\answer A \TeX nician (underpaid); sometimes also called a \TeX acker.
\endchapter
They do certainly give
very strange and new-fangled names to diseases.
\author ↑{PLATO}, {\sl The Republic}, Book 3 (c.\ 375 B.C.) % 405c
\bigskip
Technique! The very word is like the shriek
Of outraged Art. It is the idiot name
Given to effort by those who are too weak,
Too weary, or too dull to play the game.
\author LEONARD ↑{BACON}, {\sl Sophia Trenton\/} (1920) % composed at Stanford
\eject
\beginchapter Chapter 2. Book Printing\\versus\\Ordinary Typing
When you first started using a computer terminal, you probably had to adjust
to the difference between the digit `1' and the lower-case letter `l'.
When you take the next step to the level of typography that is common in
book publishing, a few more adjustments of the same kind need to be made;
your eyes and your fingers need to learn to make a few more distinctions.
In the first place, there are two kinds of ↑{quotation marks} in books, but
only one kind on the typewriter. Even your computer terminal, which has
more characters than an ordinary typewriter, probably has only a
non-oriented double-quote mark (|"|), because the standard ``↑{ascii}'' code
for computers was not invented with book publishing in mind. However, your
terminal probably does have two flavors of single-quote marks, namely |`|
and |'|; the second of these is useful also as an ↑{apostrophe}.
American keyboards usually contain a left-quote character that shows up
as something like {\tt\char'15}, and an apostrophe or right-quote that
looks like {\tt\char'177} or {\tt\char'16}.
To produce double-quote marks with \TeX, you simply type two single-quote marks
of the appropriate kind. For example, to get the phrase
$$\displaybox{``I understand.''}$$
(including the quotation marks) you should type
\begintt
``I understand.''
\endtt
to your computer.
A typewriter-like style of type will be used throughout this manual to indicate
\TeX\ constructions that you might type on your terminal, so that the
symbols actually typed are readily distinguishable from the output \TeX\ would
produce and from the comments in the manual itself. Here are the symbols to be
used in the examples:
\begintt
ABCDEFGHIJKLMNOPQRSTUVWXYZ
abcdefghijklmnopqrstuvwxyz
0123456789"#$%&@*+-=,.:;?!
()<>[]{}`'\|vrt/_↑~
\endtt
If these are not all on your computer terminal, don't despair; \TeX\ can make
do with the ones you have. An additional symbol
$$\displaybox{\]}$$
is also
used to stand for a {\sl ↑{blank space}}, in case it is important to
emphasize that a blank space is being typed;
thus, what you {\sl really\/} type in the example above is
\begintt
``I|]understand.''
\endtt
Without such a symbol you would have
difficulty seeing the invisible parts of certain constructions. But we
won't be using `\]' very often, because spaces are usually visible enough.
Another important distinction between book printing and ordinary typing is
the use of ↑{dashes}, ↑{hyphens}, and ↑{minus signs}. In good math books,
these symbols are all different; in fact there are usually at least four
different symbols:
$$\obeylines\vbox{
a hyphen (-);
an en-dash (--);
an em-dash (---);
a minus sign ($-$).}$$
Hyphens are used for compound words like `daughter-in-law' and `X-rated'.
↑{En-dash}es are used for number ranges like `pages 13--34', and also in
contexts like `exercise 1.2.6--52'. ↑{Em-dash}es are used for punctuation in
sentences---they are what we often call simply dashes. And minus signs are
used in formulas. A conscientious user of \TeX\ will be careful to distinguish
these four usages, and here is how to do it:
$$\obeylines\vbox{
for a hyphen, type a hyphen (|-|);
for an en-dash, type two hyphens (|--|);
for an em-dash, type three hyphens (|---|);
for a minus sign, type a hyphen in mathematics mode (|$-$|).}$$
(Mathematics mode occurs between dollar signs; it is discussed later, so you
needn't worry about it now.)
\exercise Explain how to type the following sentence to \TeX: Alice said,
``I always use an en-dash instead of a hyphen when specifying page numbers
like `480--491' in a ↑{bibliography}.''
\answer |Alice said, ``I always use an en-dash instead of a hyphen|\ttspace
|when specifying page numbers like `480--491' in a bibliography.''| \
(The wrong answer to this question ends with |'480-49l' in a bibliography."|)
\exercise What do you think happens when you type four hyphens in a row?
\answer You get em-dash and hyphen (----), which looks awful.
If you look closely at most well-printed books, you will find that certain
combinations of letters are treated as a unit. For example, this is true
of the `f' and the `i' of `find'. Such combinations are called {\sl
↑{ligatures}}, and professional typesetters have traditionally been
trained to watch for letter combinations such as |ff|, |fi|, |fl|, |ffi|, and
|ffl|. \ (The reason is that words like `f{}ind' don't look very good in
most styles of type unless a ligature is substituted for the letters that
clash. It's somewhat surprising how often the traditional ligatures appear
in English; other combinations are important in other languages.)
\exercise Think of an English word that contains two ligatures.
\answer fluffier firefly fisticuffs, flagstaff fireproofing,
chiffchaff and riffraff.
The good news is that you do
{\sl not\/} have to concern yourself with ligatures: \TeX\ is perfectly
capable of handling such things by itself, using the
same mechanism that converts `|--|' into `--'. In fact, \TeX\ will also look
for combinations of adjacent letters (like `|A|' next to `|V|'\thinspace)
that ought to be moved closer together for better appearance; this is
called {\sl ↑{kerning}}.
\medbreak
To summarize this chapter: When using \TeX\ for straight copy, you type
the copy as on an ordinary typewriter, except that you need to be careful
about quotation marks, the number 1, and various kinds of hyphens/dashes.
\TeX\ will automatically take care of other niceties like ligatures and
kerning.
\danger (Are you sure you should be reading this paragraph? The
``↑{dangerous bend}'' sign here is meant to warn you about material that
ought to be skipped on first reading. And maybe also on second reading.
The reader-beware paragraphs sometimes refer to concepts that aren't
explained until later chapters.)
\danger If your keyboard does not contain a left-quote symbol, you can
type `↑{:lq}', followed by a space if the next character is a letter, or
followed by a `|\|' if the next character is a space. Similarly, `↑{:rq}'
yields a right-quote character. Is that clear?
\begintt
\lq\lq I understand.\rq\rq\|]
\endtt
\danger In case you need to type ↑{quotes within quotes}, for example a
single quote followed by a double quote, you can't simply type
\thinspace|'''|\thinspace\
because \TeX\ will interpret this as ''' (namely, double-quote followed by
single-quote). If you have already read Chapter@5, you might expect that
the solution will be to use grouping---namely, to type something like
\thinspace|{'}''|. But it turns out that this doesn't produce the
desired result, because there is usually less space following a single quote
than there is following a double quote: What you get is {'}'', which is indeed
a single quote followed by a double quote (if you look at it closely enough),
but it looks almost like three equally-spaced single quotes.
On the other hand, you certainly won't want to type \thinspace|'|\]|''|,
because that space is much too large---it's just as large as the space between
words---and \TeX\ might even start a new line at such a space when making
up a paragraph! The solution is to type \thinspace|'\thinspace''|, which
produces '\thinspace'' as desired.↑(:thinspace)
\dangerexercise OK, now you know how to produce ''' and '\thinspace'';
how do you get ``\thinspace` and `{}``\thinspace?
\answer |``\thinspace`|; and either |`{}``| or |{`}``| or something similar.
\dangerexercise Why do you think the author introduced the control
sequence |\thinspace| to solve the adjacent-quotes problem, instead of
recommending the trickier construction |'$\,$''| (which also works)?
\answer Eliminating ↑{:thinspace} would mean that a user need not learn
the term; but it is not advisable to minimize terminology by ``overloading''
math mode with tricky constructions. For example, a user who wishes to
take advantage of \TeX's ↑{:mathsurround} feature would be thwarted by
non-mathematical uses of dollar signs. \ (Incidentally, neither |\thinspace|
nor ↑{:,} are built into \TeX; both are defined in terms of more
primitive features, in Appendix@B.)
\endchapter
In modern Wit all printed Trash, is
Set off with num'rous\/ {\rm Breaks}\raise.5ex\vbox{\hrule width 2em}%
and\/ {\rm Dashes}\raise.5ex\vbox{\hrule width 1em}
% no period after the em-dash: stet!
% Sir Walter Scott ruined this quote in his edition of Swift!
\author JONATHAN ↑{SWIFT}, {\sl On Poetry: A Rapsody\/} (1733)
% Rapsody: stet!
\bigskip
Some compositors still object to work
in offices where type-composing machines are introduced.
\author WILLIAM STANLEY ↑{JEVONS}, {\sl Political Economy\/} (1878) % sec 55
% "They are all afraid that if the work is done too easily and rapidly,
% they will not be wanted to do it."
% Jevons goes on to say that justifying and page makeup can't be done
% profitably by machines, so the employees needn't fear losing their jobs.
\eject
\beginchapter Chapter 3. Controlling\\\TeX
Your keyboard has very few keys compared to the large number of symbols that you
may want to specify. In order to make a limited keyboard sufficiently versatile,
one of the characters that you can type is reserved for special use, and
it is called the {\sl ↑{escape character}}. Whenever you want to type
something that controls the format of your manuscript, or something that
doesn't use the keyboard in the ordinary way, you should type the escape
character followed by an indication of what you want to do.
Note: Some computer terminals have a key marked `|ESC|', but that is {\sl not\/}
your escape character! It is a key that sends a special message to the operating
system, so don't confuse it with what this manual calls ``escape''.
\TeX\ allows any character to be used for escapes, but the ``↑{backslash}''
character `|\|' is usually adopted for this purpose, since backslashes are
reasonably convenient to type and they are rarely needed in ordinary text.
Things work out best when different \TeX\ users do things consistently,
so we shall escape via backslashes in all the examples of this manual.
Immediately after typing `|\|' (i.e., immediately after an escape
character) you type a coded command telling \TeX\ what you have in mind.
Such commands are called {\sl ↑{control sequences}}. For example, you might type
↑(markup commands, see control sequences)
\begintt
\input MS
\endtt
which (as we will see later) causes \TeX\ to begin reading a file called
`|MS.tex|'; the string of characters `↑{*input}' is a control sequence.
Here's another example:
\begintt
George P\'olya and Gabor Szeg\"o.
\endtt
\TeX\ converts this to `George P\'olya and Gabor Szeg\"o.' There are two
↑(Polya)↑(Szego)
control sequences, ↑{:'} and ↑{:"}, here; these control sequences
have been used to place accents over some of the letters.
Control sequences come in two flavors. The first kind, like |\input|,
is called a {\sl↑{control word}\/}; it
consists of an escape character followed by one or more {\sl letters}, followed
by a space or by something besides a letter. \ (\TeX\ has to know where the
control sequence ends, so you must put a space after a control word if
the next character is a letter. For example, if you type `|\inputMS|',
\TeX\ will naturally interpret this as a control word with seven
letters.) \ In case you're wondering what a ``↑{letter}'' is, the answer
is that \TeX\ normally regards the 52 symbols |A...Z| and |a...z| as
letters. The digits |0...9| are {\sl not\/} considered to be
letters, so they don't appear in control sequences of the first kind.
A control sequence of the other kind, like |\'|, is called a {\sl
↑{control symbol}\/}; it consists of the escape
character followed by a single {\sl nonletter}. In this case you don't
need a space to separate the control sequence from a letter that follows,
since control sequences of the second kind always have a exactly one
symbol after the escape character.
\exercise What are the control sequences in `|\I'm \exercise3.1\\!|'\thinspace?
\answer |\I|, |\exercise|, and |\\|. (The last of these is of type@2, i.e.,
a control symbol, since the second backslash is not a letter; the first
backslash keeps the second one from starting its own control sequence.)
\exercise We've seen that the input |P\'olya| yields `P\'olya'. Can
you guess how the French words `math\'ematique' and `centim\`etre'
should be specified?
\answer |math\'ematique| and |centim\`etre|.↑(:')↑(:`)
When a space comes after a control word (an all-letter control
sequence), it is ignored by
\TeX; i.e., it is not considered to be a ``real'' space belonging to the
manuscript that is being typeset. But when a space comes after a control
symbol, it's truly a space.
Now the question arises, what do you do if you actually {\sl want\/} a
space to appear after a control word? We will see later that \TeX\
treats two or more consecutive spaces as a single ↑{space}, so the answer
is {\sl not\/} going to be ``type two spaces.'' The correct answer is to
type ``escape space,'' ↑(*{ }) namely
\begintt
\|]
\endtt
(the escape character followed by a blank space); \TeX\ will treat this as
a space that is not to be ignored. Notice that escape-space is a control
sequence of the second kind, i.e., a control symbol, since there is a
single nonletter (\]) following the escape character. Two consecutive
spaces are considered to be equivalent to a single space, so further
spaces immediately following |\|\] will be ignored; but if you want to
enter, say, three consecutive spaces into a manuscript you can type
`|\|\]|\|\]|\|\]'. Incidentally, typists are often taught to put two
spaces at the ends of sentences; but we will see later that \TeX\ has its
own way to produce extra space in such cases. Thus you needn't be
consistent in the number of spaces you type.
\danger Nonprinting control characters like \<return> might follow
an escape character, and these lead to distinct control sequences according
to the rules. \TeX\ is initially set up to treat |\|\<return> and
|\|\<tab> ↑(:return>)↑(:tab>)↑
the same as |\|\] (escape space); these special control sequences
should probably not be redefined, because you can't see the difference
between them when you look at them in a file.
↑(carriage-return, see <return>)
It is usually unnecessary for you to use ``escape space,'' since control
sequences aren't often needed at the ends of words. But here's an example
that might shed some light on the matter: This manual itself has been
typeset by \TeX, and one of the things that occurs fairly often is the
tricky ↑{logo} `\TeX', which requires backspacing and lowering the E.
There's a special control word
\begintt
\TeX
\endtt
that produces the half-dozen or so instructions necessary to typeset `\TeX'.
When a phrase like `\TeX\ ignores spaces after control words.' is
desired, the manuscript renders it as follows:
\begintt
\TeX\ ignores spaces after control words.
\endtt
Notice the extra |\| following |\TeX|; this produces the escape-space
that is necessary because \TeX\ ignores spaces after control words.
Without this extra |\|, the result would have been
$$\displaybox{\TeX ignores spaces after control words.}$$
On the other hand, you can't simply put |\| after |\TeX| in all contexts.
For example, consider the phrase
\begintt
the logo `\TeX'.
\endtt
In this case an extra backslash doesn't work at all; in fact,
you get a curious result if you type
\begintt
the logo `\TeX\'.
\endtt
Can you guess what happens? \ Answer: The |\'| is a control sequence denoting
an acute accent, as in our |P\'olya| example above; the effect is
therefore to put an accent over the next nonblank character,
which happens to be a period. In other words, you get an accented
period, and the result is
$$\displaybox{the logo `\TeX\'.}$$
Computers are good at following instructions, but not at reading your mind.
\TeX\ understands about 900 control sequences as part of its built-in
vocabulary, and all of them are explained in this manual somewhere. But
you needn't worry about learning so many different things, because you won't
really be needing very many of them unless you are faced with unusually
complicated copy. Furthermore, the ones you do need to learn actually fall into
relatively few categories, so they can be assimilated without great difficulty.
For example, many of the control sequences are simply the names of special
characters used in math formulas; you type `↑{:pi}'@to get@`$\pi$',
`↑{:Pi}'@to get@`$\Pi$',
`↑{:aleph}'@to get@`$\aleph$',
`↑{:infty}'@to get@`$\infty$',
`↑{:le}'@to get@`$\le$',
`↑{:ge}'@to get@`$\ge$',
`↑{:ne}'@to get@`$\ne$',
`↑{:oplus}'@to get@`$\oplus$',
`↑{:otimes}'@to get@`$\otimes$'.
Appendix@F contains several tables of such symbols.
\danger There's no built-in relationship between ↑{upper-case} and ↑{lower-case}
letters in control sequence names. For example, `|\pi|' and `|\Pi|'
and `|\PI|' and `|\pI|' are four different control words.
The 900 or so control sequences that were just mentioned actually aren't
the whole story, because it's easy to define more. For example, if you
want to substitute your own favorite names for math symbols, so that you
can remember them better, you're free to go right ahead and do it;
Chapter@20 explains how.
About 300 of \TeX's control sequences are called {\sl ↑{primitive}\/}; these
are the low-level atomic operations that are not decomposable into simpler
functions. All other control sequences are defined, ultimately, in terms
of the primitive ones. For example, ↑{*input} is a primitive operation,
but ↑{:'} and ↑{:"} are not; the latter are defined in terms of an
↑{*accent} primitive.
People hardly ever use \TeX's primitive control sequences in their
manuscripts, because the primitives are $\ldots$ well $\ldots$ so
{\sl primitive}. You have to type a lot of instructions when you are
trying to make \TeX\ do low-level things; this takes time and invites
mistakes. It is generally better to make use of higher-level control
sequences that state what functions are desired, instead of typing
out the way to achieve each function each time. The higher-level control
sequences need to be defined only once in terms of primitives. For
example, |\TeX| is a control sequence that means ``typeset the \TeX\ logo'';
|\'| is a control sequence that means ``put an acute accent over the
next character''; and both of these control sequences might require different
combinations of primitives when the style of type changes. If \TeX's logo
were to change, the author would simply have to change one definition, and the
changes would appear automatically wherever they were needed. By contrast,
an enormous amount of work would be necessary to change the logo if it
were specified as a sequence of primitives each time.
At a still higher level, there are control sequences that govern the
overall format of a document. For example, in the present book the author
typed `↑{:exercise}' just before stating each exercise; this |\exercise|
command was programmed to make \TeX\ do all of the following things:
\nobreak\medskip
\item\bull compute the exercise number (e.g., `3.2' for the second
exercise in Chapter@3);
\smallskip
\item\bull typeset `\thinspace{\manual\char'170\hskip.15em
\ninebf EXERCISE \bf3.2}' with the appropriate typefaces, on a line by
itself, and with the triangle sticking out in the left margin;
\smallskip
\item\bull leave a little extra space just before that line, or begin
a new page at that line if appropriate;
\smallskip
\item\bull prohibit beginning a new page just after that line;
\smallskip
\item\bull suppress indentation on the following line.
\medbreak\noindent
It is obviously advantageous to avoid typing all of these individual
instructions each time. And since the manual is entirely described in
terms of high-level control sequences, it could be printed in a radically
different format simply by changing a dozen or so definitions.
% and sweating over the page layout in a the math and alignment chapters!
\danger How can a person distinguish a \TeX\ primitive from a control sequence
that has been defined at a higher level? There are two ways: \ (1)@The index
to this manual lists all of the control sequences that are discussed, and each
primitive is marked with an asterisk. \ (2)@You can display the meaning of a
control sequence while running \TeX. If you type `↑{*show}|\cs|'
where |\cs| is any control sequence, \TeX\ will respond with its current
meaning. For example, `|\show\input|' results in \hbox{`|> \input=\input.|'},
because |\input| is primitive. On the other hand, `|\show|↑{:thinspace}' yields
\begintt
> \thinspace=macro:
->\kern .16667em .
\endtt
This means that |\thinspace| has been defined as `|\kern .16667em|'; by
typing `|\show|\penalty0|\kern|' you can verify that ↑{*kern} is primitive. The
results of\/ |\show| appear on your terminal and in the ↑{log file} that
you get after running \TeX.
\dangerexercise Which of the control sequences |\|\] and
|\|\<return> is primitive?
\answer According to the index, |\|\] is primitive but
|\|\<return> isn't. The command `|\def\↑↑M{\ }|' in
Appendix@B is what actually defines |\|\<return>, since a
return is representable as |↑↑M|. Asking \TeX\ to |\show\↑↑M|
\looseness-1
produces the response `|>| |\↑↑M=macro:->\| |.|'.
In the following chapters we shall frequently discuss ``↑{plain \TeX}'' format,
which is a set of about 600 ↑{basic control sequences} that are defined in
Appendix@B\null. These control sequences, together with the 300 or so
primitives, are usually present when \TeX\ begins to process a manuscript;
that is why \TeX\ claims to know roughly 900 control sequences when it starts.
We shall see how plain \TeX\ can be used to create documents in a format
that meets many people's needs, using some typefaces that come with the
\TeX\ system. However, you should keep in mind that plain \TeX\ is only one
of countless ↑{formats} that can be designed on top of \TeX's primitives;
if you want some other format, it will usually be possible to adapt \TeX\
so that it will handle whatever you have in mind. The best way to learn
is probably to start with plain \TeX\ and to change its definitions,
little by little, as you gain more experience.
\danger Appendix E contains examples of formats that can be added to
Appendix@B for special applications; for example, there is a set of
definitions suitable for business correspondence. A complete specification
of the format used to typeset this manual also appears in Appendix@E\null.
Thus, if your goal is to learn how to design \TeX\ formats, you will
probably want to study Appendix@E while mastering Appendix@B\null. After you
have become skilled in the lore of control-sequence definition, you
will probably have developed some formats that other people will want
to use; you should then write a supplement to this manual, explaining
your style rules.
The main point of these remarks, as far as novice \TeX\ users are concerned, is
that it is indeed possible to define nonstandard \TeX\ control sequences.
When this manual says that something is part of ``plain \TeX,'' it means
that \TeX\ doesn't insist on doing things exactly that way; a person
could change the rules by changing one or more of the definitions in
Appendix@B\null. But you can safely rely on the control sequences of plain \TeX\
until you become an experienced \TeX nical@typist.
\ddangerexercise How many different control sequences of length@2
(including the escape character) are possible? How many of length@3?
\answer There are 128 of length@2; most of these are undefined when \TeX\
begins. \ (\TeX\ allows any character to be an escape, but it does not
distinguish between control sequences that start with different escape
characters.) \
If we assume that there are 52 letters, there are exactly $52↑2$
possible control sequences of length@3 (one for each pair of letters, from
|AA| to |zz|). But Chapter@7 explains how to use ↑{*catcode} to change any
character into a ``↑{letter}''; therefore it's possible to use any of
$128↑2$ potential control sequences of length@3.
\endchapter
Syllables govern the world.
\author JOHN ↑{SELDEN}, {\sl Table Talk\/} (1689) % section on Power
\bigskip
I claim not to have controlled events,
but confess plainly that events have controlled me.
\author ABRAHAM ↑{LINCOLN} (1864) % letter to A. G. Hodges, April 4
\eject
\beginchapter Chapter 4. Fonts\\of Type
Occasionally you will want to change from one ↑{typeface} to another, for
example if you wish to be {\bf ↑{bold}} or to {\sl emphasize\/} something.
\TeX\ deals with sets of up to 256 characters called ``↑{fonts}'' of type,
and control sequences are used to select a particular font. For example,
you could specify the last few words of the first sentence above
in the following way, using the plain \TeX\ format of Appendix@B:
\begintt
to be \bf bold \rm or to \sl emphasize \rm something.
\endtt
Plain \TeX\ provides the following control sequences for changing fonts:
$$\halign{\indent#\hfil\qquad\hfil\cr
|\rm| switches to the normal ``roman'' typeface:&Roman\cr
|\sl| switches to a slanted roman typeface:&\sl Slanted\cr
|\it| switches to italic style:&\it Italic\cr
|\tt| switches to a typewriter-like face:&\tt Typewriter\cr
|\bf| switches to an extended boldface style:&\bf Bold\cr}$$
↑(:rm)↑(:sl)↑(:it)↑(:tt)↑(:bf)↑(typewriter type)↑(face)
At the beginning of a run you get ↑{roman type} (|\rm|) unless you specify
otherwise.
Notice that two of these faces have an ``↑{oblique}'' slope for emphasis:
{\sl ↑{Slanted type} is essentially the same as roman, but the letters are
slightly skewed, \it while the letters in ↑{italic type} are drawn in a
different style.} \ (You can perhaps best appreciate the difference between
the roman and italic styles by contemplating {\tenu letters that are
in an unslanted italic face.}) \ Typographic conventions are presently
in a state of transition, because new technology has made it possible to
do things that used to be prohibitively expensive; people are wrestling
with the question of how much to use their new-found typographic freedom.
Slanted roman type was introduced in the 1930s, but it first became widely
used as an alternative to the conventional italic during the late 1970s.
It can be bene\-ficial in mathematical texts, since slanted
letters are distinguishable from the italic letters in math formulas.
The double use of italic type for two different purposes---for example,
when statements of theorems are italicized as well as the names of variables in
those theorems---has led to some confusion, which can now be
avoided with slanted type. People are not generally agreed about the relative
merits of slanted versus italic, but slanted type is rapidly becoming a
favorite for the titles of books and journals in bibliographies.
Special fonts are effective for emphasis, but not for sustained reading;
your eyes would tire if long portions of this manual were entirely set in
a bold or slanted or italic face. Therefore roman type accounts for the
bulk of most typeset material. But it's a nuisance to say `|\rm|' every
time you want to go back to the roman style, so \TeX\ provides an easier
way to do it, using ``↑{curly brace}↑(brace)'' symbols: You can switch
fonts inside the special symbols |{| and |}|, without affecting the fonts
outside. For example, the displayed phrase at the beginning of this
chapter is usually rendered
\begintt
to be {\bf bold} or to {\sl emphasize} something.
\endtt
This is a special case of the general idea of ``↑{grouping}'' that we shall
discuss in the next chapter. It's best to forget about the first way of
changing fonts, and to use grouping instead; then your \TeX\ manuscripts
will look more natural, and you'll probably
never\footnote*{Well \dots, hardly ever.} have to type `|\rm|'.
\exercise Explain how to type the bibliographic reference `Ulrich ↑{Dieter},
{\sl Journal f\"ur die reine und angewandte Mathematik\/ \bf201} (1959),
37--70'. [Use grouping.]
\answer |Ulrich Dieter, {\sl Journal f\"ur die reine und angewandte|\hfil
\break
|Mathematik\/ \bf201} (1959), 37--70|'. \ (It's convenient to use a single
group for both |\sl| and |\bf| here. The `|\/|' is a refinement
that you might not understand until you read the rest of Chapter@4.)
We have glossed over an important aspect of quality in the preceding
discussion. Look, for example, at the {\it italicized} and {\sl slanted}
words in this sentence. Since italic and slanted styles slope to the right,
the d's stick into the spaces that separate these words from the roman
type that follows; as a result, the spaces appear to be too skimpy,
although they are correct at the base of the letters. To equalize the
effective white space, \TeX\ allows you to put the special control sequence
`↑{*/}' just before switching back to unslanted letters. When you type
\begintt
{\it italicized\/} and {\sl slanted\/} words
\endtt
you get {\it italicized\/} and {\sl slanted\/} words that look better.
The `|\/|' tells \TeX\ to add an\break % makes the line tighter, to be fair
``{\sl↑{italic correction}\/}'' to the
previous letter, depending on that letter; this correction is about four
times as much for an `$f$' as for a `$c$', in a typical italic font.
Sometimes the italic correction is not desirable, because other factors take
up the visual slack. The standard rule of thumb is to use |\/| just before
switching from slanted or italic to roman or bold, unless the next
character is a period or comma. For example, type
\begintt
{\it italics\/} for {\it emphasis}.
\endtt
Old manuals of style say that the ↑{punctuation} after a word should be in the
{\it same\/} font as that {\it word;\/} but an italic semicolon often looks
wrong, so this convention is changing. When an italicized word occurs
just before a semicolon, the author recommends typing `|{\it word\/};|'.
\exercise {\it Explain how to typeset a\/ {\rm roman} word in the midst
of an italicized sentence.}
\answer |{\it Explain ... typeset a\/ {\rm roman} word ... sentence.}|
Note the position of the italic correction in this case.
\danger Every letter of every font has an italic correction, which you can
bring to life by typing |\/|. The correction is usually zero in unslanted
styles, but there are exceptions: To typeset a bold `{\bf f\/}' in quotes,
you should say |a| |bold| \hbox{|`{\bf f\/}'|}, lest you get a bold `{\bf f}'.
\ddangerexercise Define a control sequence |\ic| such that `|\ic c|' puts the
italic correction of character $c$ into \TeX's register |\dimen0|.
\answer |\def\ic#1{\setbox0=\hbox{#1\/}\setdimen0=1wd0|\parbreak
|\setbox0=\hbox{#1}\advdimen0 by -1wd0}|.
\ddanger The primitive control sequence ↑{*nullfont} stands for a font that
has no characters. This font is always present, in case you haven't
specified any others.
Fonts vary in size as well as in shape. For example, the font you are now
reading is called a ``10-point'' font, because certain features of its
design are 10 ↑{points} apart, when measured in printers' units. \ (We
will study the point system later; for now, it should suffice to point out
that the parentheses around this sentence are exactly 10 points tall---and
the em-dash is just 10 points wide.) \ The ``↑{dangerous bend}''
sections of this manual are set in 9-point type, the foot\-notes in 8-point,
↑{subscripts} in 7-point or 6-point, sub-subscripts in 5-point.
Each font used in a \TeX\ manuscript is associated with a control sequence;
for example, the 10-point font in this paragraph is called ↑{:tenrm}, and
the corresponding 9-point font is called ↑{:ninerm}. The slanted fonts that
match |\tenrm| and |\ninerm| are called ↑{:tensl} and ↑{:ninesl}. These
control sequences are not built into \TeX, nor are they the actual names
of the fonts; \TeX\ users are just supposed to make up convenient names,
whenever new fonts are introduced into a manuscript. Such control
sequences are used to change typefaces.
When fonts of different sizes are used simultaneously, \TeX\ will line the
letters up according to their ``↑{baseline}s.'' For example, if you type
\begintt
\tenrm smaller \ninerm and smaller
\eightrm and smaller \sevenrm and smaller
\sixrm and smaller \fiverm and smaller \tenrm
\endtt
the result is {smaller \ninerm and smaller \eightrm and smaller
\sevenrm and smaller \sixrm and smaller \fiverm and smaller}. Of course
this is something that authors and readers aren't accustomed to, because
printers couldn't do such things with traditional lead types. Perhaps
poets who wish to speak in {\fiverm a still small voice} will cause future
books to make use of frequent font variations, but nowadays it's only
an occasional font freak {\fiverm(like the author of this manual)} who
likes such experiments. One should not get too carried away by the prospect
of font switching unless there is good reason.
An alert reader might well be confused at this point because we started out
this chapter by saying that `|\rm|' is the command that switches to roman
type, but later on we said that `|\tenrm|' is the way to do it. The truth
is that both ways work. But it has become customary to set things up so that
|\rm| means ``switch to roman type in the current size'' while |\tenrm| means
``switch to roman type in the 10-point size.'' In plain \TeX\ format, nothing
but 10-point fonts are provided, so |\rm| will always get you |\tenrm|; but
in more complicated formats the meaning of\/ |\rm| will change in different
parts of the manuscript. For example, in the format used by the author to
typeset this manual, there's a control sequence `↑{:tenpoint}' that causes
|\rm| to mean |\tenrm|, |\sl| to mean |\tensl|, and so on, while
`↑{:ninepoint}' changes the definitions so that |\rm| means |\ninerm|,
etc. There's another control sequence used to introduce the quotations at
the end of each chapter; when the quotations are typed, |\rm| and |\sl|
temporarily stand for {\eightss 8-point unslanted sans-serif type} and
{\eightsss 8-point slanted sans-serif type}, respectively. This device of
constantly redefining the abbreviations |\rm| and |\sl|, behind the
scenes, frees the typist from the need to remember what size or style of
type is currently being used.
\exercise Why do you think the author chose the names `|\tenpoint|' and
`|\tenrm|', etc., instead of `|\10point|' and `|\10rm|'\thinspace?
\answer Control word names are made of letters, not digits.
\danger Each font has an external name that identifies it with respect to
all other fonts in a particular library. For example, the font in this
sentence is called `|cmr9|', which is an abbreviation for ``↑{Computer
Modern} Roman 9@point.'' ↑(cm fonts) In order to prepare \TeX\ for
using this font, the command
\begintt
\font\ninerm=cmr9
\endtt
appears in Appendix E\null. In general you say `↑{*font}|\cs=|\<external
font name>' to load the information about a particular font into \TeX's
memory; afterwards the control sequence |\cs| will select that font for
typesetting. Plain \TeX\ makes only a few fonts available initially (see
Appendix@F\null), but you can use |\font| to access anything that exists
in your system's font library.
\danger It is often possible to use a font at several different sizes, by
magnifying or shrinking the character images. Each font has a so-called
↑{design size}, which reflects the size it normally has by default; for
example, the design size of |cmr9| is 9@points. But on many systems there is
also a range of sizes at which you can use a particular font, by scaling its
dimensions up or down. To load a scaled font into \TeX's memory, you
simply say `|\font\cs=|\<external font name> ↑{.at} \<desired size>'.
For example, the command
\begintt
\font\magnifiedfiverm=cmr5 at 10pt
\endtt
brings in 5-point Computer Modern Roman at twice its normal size. \ (Caution:
Before using this `|at|' feature, you should check to make sure that your
typesetter supports the font at the size in question; \TeX\ will accept any
\<desired size> that is positive and less than 2048 points, but the final
output will not be right unless the scaled font really is available on your
printing device.)
\danger What's the difference between |cmr5| |at| |10pt| and the normal
10-point font, |cmr10|? Plenty; a well-designed font will be drawn
differently at different point sizes, and the letters will often have
different relative heights and widths, in order to enhance readability.
$$\displaybox{\tenrm Ten point type is different from%
\magnifiedfiverm\ magnif{}ied f{}ive-point type.}$$
It is usually best to scale fonts only slightly with respect to
their design size, unless the final product is going to be photographically
reduced after \TeX\ has finished with it, or unless you are trying for an
unusual effect.↑(magnification)↑(reduction)
\dangerexercise Suppose that you have typed a manuscript using slanted type for
emphasis, but your editor suddenly tells you to change all the slanted to
italic. What's an easy way to do this?
\answer Say |\def\sl{\it}| at the beginning, and delete other definitions
of |\sl| that might be present in your format file (e.g., there might be
one inside a |\tenpoint| macro).
\endchapter
Type faces---like people's faces---have distinctive features
indicating aspects of character. % I don't think he was kidding
\author MARSHALL ↑{LEE}, {\sl Bookmaking\/} (1965) % page 83
\bigskip
This was the Noblest Roman of them all.
\author WILLIAM ↑{SHAKESPEARE}, {\oldeightsss The Life and Death %
of Julius C\ae sar\/} (1599) % Act V, Scene 5, line 68
% For Shakespeare I'm using the spelling from First Folio (1623)
% but act/line numbers from The Riverside Shakespeare (throughout)
\eject
\beginchapter Chapter 5. Grouping
Every once in a while it is necessary to treat part of a manuscript as a
unit, so you need to indicate somehow where that part begins and where it
ends. For this purpose \TeX\ gives special interpretation to two
``↑{grouping characters},'' which (like the escape character) are
treatly differently from the normal symbols that you type. We assume in
this manual that |{| and |}| are the grouping characters, since they
are the ones used in plain \TeX. ↑(curly braces, see braces)
We saw examples of grouping in the previous chapter, where it was mentioned
that font changes inside a group do not affect the fonts in force outside.
The same principle applies to almost anything else that is defined inside
a group, as we will see later; for example, if you define a control sequence
within some group, that definition will disappear when the group ends.
In this way you can conveniently instruct \TeX\ to do something unusual,
by changing its normal conventions temporarily inside of a group; since
the changes are invisible from outside the group, there is no need to worry
about messing up the rest of a manuscript by forgetting to restore the
normal conventions when the unusual construction has been finished.
Computer scientists have a name for this aspect of grouping, because it's
an important aspect of programming languages in general; they call it
``↑{block structure},'' and definitions that are in force only within
a group are said to be ``↑{local}'' to that group.
You might want to use grouping even when you don't care about block
structure, just to have better control over spacing. For example, let's
consider once more the control sequence ↑{:TeX} that produces
the logo `\TeX' in this manual: We observed in Chapter@3 that a blank space
after this control sequence will be gobbled up unless one types
`\hbox{|\TeX\ |}', yet it is a mistake to say `|\TeX\|' when the following
character is not a blank space. Well, in {\sl all\/} cases it would be
correct to specify the simple group
\begintt
{\TeX}
\endtt
whether or not the following character is a ↑{space}, because the |}| stops
\TeX\ from looking for the optional space after |\TeX|. This might come in
handy when you're using a text editor (e.g., when replacing all occurrences
of a particular word by a control sequence). Another thing you could do is
type
\begintt
\TeX{}
\endtt
using an {\sl empty\/} group for the same purpose: the |{}| here is a
group of no characters, so it produces no output, but it does have the
effect of stopping \TeX\ from skipping blanks.
↑(empty group) ↑(lbrace rbrace)
\exercise Sometimes you run into a rare word like `shelfful' that looks
better as `shelf{}ful' without the `ff' ↑{ligature}. How can you fool
\TeX\ into thinking that there aren't two consecutive f's in such a word?
\answer |{shelf}ful| or |shelf{}ful|, etc.; or even |shelf\/ful|, which
yields a shelf\/ful instead of a shelf{}ful. ↑(*/)
\dangerexercise Explain how to get three blank spaces in a row without
using `|\|\]'.↑(escape space)
\answer `\]|{|\]|}|\]' or
`\]|{}|\]|{}|\]', etc. \ (These aren't strictly equivalent
to `|\|\]|\|\]|\|\]', since they adjust the spaces
by the current ``↑{space factor},'' as explained later.)
But \TeX\ also uses grouping for another, quite different, purpose, namely
to determine how much of your text is to be governed by certain control
sequences. For example, if you want to center something on a line you can type
\begintt
\centerline{This information should be centered.}
\endtt
using the control sequence ↑{:centerline} defined in plain \TeX\ format.
Grouping is used in quite a few of \TeX's more intricate instructions; and
it's possible to have groups within groups within groups, as you can see
by glancing at Appendix@B\null. Complex grouping is generally unnecessary,
however, in ordinary manuscripts, so you needn't worry about it. Just
don't forget to finish each group that you've started, because a lost
`|}|' might cause trouble.
Here's an example of two groups, one ↑{nested} inside the other:
\begintt
\centerline{This information should be {\it centered}.}
\endtt
As you might expect, \TeX\ will produce a centered line that also contains
italics:
$$\hbox{This information should be {\it centered}.}$$
But let's look at the example more closely: `|\centerline|' appears outside
the curly braces, while `|\it|' appears inside. Why are the two cases
different? And how can a beginner learn to remember which is which?
Answer: |\centerline| is a control sequence that applies only to the very next
thing that follows, so you want to put braces around the text that is to
be centered (unless that text consists of a single symbol or control sequence).
For example, to center the \TeX\ logo on a line, it would suffice to
type `|\centerline\TeX|', but to center the phrase `\TeX\ has groups' you
need braces: `|\centerline{\TeX\ has groups}|'. On the other hand, |\it| is
a control sequence that simply means ``change the current font''; it acts
without looking ahead, so it affects {\sl everything\/} that follows, at
least potentially. The braces surround |\it| in order to confine the font
change to a local region.
In other words, the two sets of braces in this example actually have different
functions: One serves to treat several words of the text as if they
were a single object, while the other provides local block structure.
\exercise What do you think happens if you type the following:
\begintt
\centerline{This information should be {centered}.}
\centerline So should this.
\endtt
\answer In the first case, you get the same result as if the innermost
braces had not appeared at all, because you haven't used the grouping to
change fonts or to control spacing or anything. \TeX\ doesn't mind if you
want to waste your time making groups for no particular reason.
But in the second case, the necessary braces were forgotten. You get the
letter `S' centered on a line by itself, followed by a paragraph that
begins with `o should this.' on the next line.
\exercise And how about this one?
\begintt
\centerline{This information should be \it centered.}
\endtt
\answer You get the same result as if another pair of braces were present around
`|\it centered|', except that the period is typeset from the italic font. \
(Both periods look about the same.) \ The |\it| font will not remain in force
after the |\centerline|, but this is something of a coincidence: \TeX\ uses
the braces to determine what text is to be centered, but then it removes
the braces. The |\centerline| operation, as defined in Appendix@B\null, puts the
resulting braceless text inside {\sl another\/} group; and that's why
|\it| disappears after |\centerline|. \ (If you don't understand this, just
don't risk leaving out braces in tricky situations, and you'll be OK.)
\smallskip
\dangerexercise Define a control sequence |\ital| so that a user could type
`|\ital{text}|' instead of `|{\it text\/}|'. Discuss the pros and cons of
|\ital| versus |\it|.
\answer |\def\ital#1{{\it#1\/}}|. \ Pro:@Users might find this easier to
learn, because it works more like |\centerline| and they don't have to
remember to make the italic correction. \ Con:@To avoid the italic correction
just before a {\it comma} or {\it period}, users should probably be taught
another control sequence; for example, with
\begintt
\def\nocorr{\kern0pt }
\endtt
a user could type `|\ital{comma} or \ital{period\nocorr},|'. The alternative
of putting a period or comma in italics, to avoid the italic correction,
doesn't look as good. A long sequence of italics would be inefficient for
\TeX, since the entire text for the argument to |\ital| must be read into
memory only to be scanned again.
\danger Subsequent chapters describe many primitive operations of \TeX\ for
which the locality of grouping is important. For example, if one says
↑{*setbox} or ↑{*setcount} within a group, the previous contents of that
box or counter will be restored when the group ends. Sometimes, however,
you want to make a definition that transcends its current group. This
effect can be obtained by prefixing `↑{*global}' to the definition. For
example, \TeX\ keeps the current page number in |\count0|, and the routine
that outputs a page wants to increase the ↑{page number}. ↑{Output routines}
are always protected by enclosing them in groups, so that they do not
inadvertently mess up the rest of \TeX; but the change to |\count0| would
disappear if it were kept local to the output group. The command
\begintt
\global\advcount0 by 1
\endtt
↑(*advcount)
solves the problem; it increases |\count0| and makes this value stick around
at the end of the output routine. In general, |\global| makes the immediately
following definition pertain to all existing groups, not just to the
innermost one.
\ddangerexercise If you think you understand local and global definitions,
here's a little test to make sure: Suppose |\s| stands for `|\setcount1=|',
|\g| stands for `|\global\setcount1=|', and |\c| stands for
`|\showthe\count1|'. What values will be shown?
\begintt
{\s1\c\g2{\c\s3\c\g4\c\s5\c}\c\s6\c}\c
\endtt
\answer |{1 {2 3 4 5} 4 6} 4|.
\ddanger Another way to obtain block structure with \TeX\ is to use the
primitives ↑{*begingroup} and ↑{*endgroup}. These control sequences make it
easy to begin a group within one control sequence and end it within
another. The text that \TeX\ actually executes, after control sequences
have been expanded, must have properly ↑{nested groups}, i.e., groups that
don't overlap. For example,
\begintt
{ \begingroup } \endgroup
\endtt
is not legitimate.
\ddangerexercise Define control sequences |\beginthe|\<block name> and
|\endthe|\<block name> that provide a ``named'' block structure. In other
words,
$$\dbox{|\beginthe{beguine}\beginthe{waltz}\endthe{waltz}\endthe{beguine}|%
\hss}$$
should be permissible, but not
$$\dbox{|\beginthe{beguine}\beginthe{waltz}\endthe{beguine}\endthe{waltz}|%
.\hss}$$
\answer |\def\beginthe#1{\begingroup\def\blockname{#1}}|\parbreak
|\def\endthe#1{\def\test{#1}%|\parbreak
| \ifx\test\blockname\endgroup|\parbreak
| \else\errmessage{You should have said|\parbreak
| \string\endthe{\blockname}}\fi}|
\endchapter
[The Poets pen] giues to aire nothing, a locall habitation,
And a name.
\author WILLIAM ↑{SHAKESPEARE}, {\sl Midsommer Nights Dreame\/} (1595)
% Act V, Scene 1, lines 16--17
\bigskip
An encounter group is a gathering, for a few hours or a few days,
of twelve or eighteen personable, responsible, certifiably normal
and temporarily smelly people.
\author JANE ↑{HOWARD}, {\sl Please Touch\/} (1970)
\eject
\beginchapter Chapter 6. Running\\\TeX
The best way to learn how to use \TeX\ is to use it. Thus, it's high time
for you to sit down at a computer terminal and interact with the \TeX\
system, trying things out to see what happens. Here are some small but
complete examples suggested for your first encounter.
↑(Running the program)
Caution: This chapter is rather a long one. Why don't you stop reading
now, and come back to it tomorrow?
\smallskip OK, let's suppose that you're rested and excited about having a
trial run of \TeX. Step-by-step instructions for using it appear in this
chapter. First do this: Go to the lab where the graphic output device is,
since you will be wanting to see the output that you get---it won't really
be satisfactory to run \TeX\ from a remote location, where you can't hold
the generated documents in your own hands. Then log in; and start \TeX. \
(You may have to ask somebody how to do this on your local computer. Usually
the operating system prompts you for a command and you type `|TeX|' or
`|run| |tex|' or something like that.)
When you're successful, \TeX\ will welcome you with a message such as
\begintt
This is TeX, Version 1.0 (format=plain 83.7.15)
**
\endtt
The `↑{.**}' is \TeX's way of asking you for an input file name.
Now type `↑{*relax}' (including the backslash), and ↑{<return}
(or whatever is used to mean ``end-of-line'' on your terminal).
\TeX\ is all geared up for action, ready to read a long manuscript; but
you're saying that it's all right to take things easy, since this is
going to be a real simple run. In fact, |\relax| is a control sequence
that means ``do nothing.''
The machine will type another asterisk at you. This time type something
like `|Hello?|' and wait for another ↑{asterisk}. Finally type `↑{*end}',
and stand back to see what happens.
\TeX\ should respond with `↑{.[1]}' (meaning that it has finished page@1
of your output); then the program will halt, probably with some
indication that it has created a file called `|texput.dvi|'. \ (\TeX\
uses the name ↑{.texput} for its output when you haven't specified any
better name in your first line of input; and ↑{.dvi} stands for
``↑{device independent},'' since |texput.dvi| is capable of
being printed on almost any kind of typographic output device.)
Now you're going to need some help again from your friendly local
computer hackers. They will tell you how to produce hardcopy from
|texput.dvi|. And when you see the hardcopy---Oh, glorious day!---you
will see a magnificent `Hello?' and the page number `1' at the bottom.
Congratulations on your first masterpiece of fine printing.
\smallbreak
The point is, you understand now how to get something through the whole cycle.
It only remains to do the same thing with a somewhat longer document.
So our next experiment will be to work from a file instead of typing
the input online.
Use your favorite text editor to create a file called ↑{.story.tex} that
contains the following 18 lines of text (no more, no less):
$$\halign{\hbox to\the\parindent{\hfil\sevenrm#\ \ }\hfil\cr
1&|\hrule|\cr\noalign{↑(*hrule)}
2&|\vskip 1in|\cr\noalign{↑(*vskip)↑(leading, see vskip)}
3&|\centerline{\bf A SHORT STORY}|\cr\noalign{↑(:centerline)}
4&|\vskip 6pt|\cr
5&|\centerline{\sl by A. U. Thor}|\cr\noalign{↑(Thor)}
6&|\vskip .5cm|\cr
7&|Once upon a time, in a distant|\cr
8&| galaxy called \"O\"o\c c,|\cr\noalign{↑(:")↑(:c)}
9&|there lived a computer|\cr
10&|named R.@J. Drofnats.|\cr\noalign{↑(Drofnats)}
11&||\cr
12&|Mr.@Drofnats---or ``R. J.,'' as|\cr
13&|he preferred to be called---|\cr
14&|was happiest when he was at work|\cr
15&|typesetting beautiful documents.|\cr
16&|\vskip 1in|\cr
17&|\hrule|\cr
18&|\vfill\eject|\cr\noalign{↑(*vfill)↑(:eject)}}$$
(Don't type the numbers at the left of these lines, of course; they are present
only for reference.) \ This example is a bit long, and more than a bit silly;
but it's no trick for a good typist like you and it will give you some
worthwhile experience, so do it. For your own good. And think about what
you're typing, as you go; the example introduces a few important features
of \TeX\ that you can learn as you're making the file.
Here is a brief explanation of what you have just typed: Lines 1 and@17
put a horizontal ↑{rule} (a thin line) across the page. Lines 2 and@16
skip past one inch of space; `|\vskip|' means ``vertical skip,'' and this
extra space will separate the horizontal rules from the rest of the copy.
Lines 3 and@5 produce the title and the author name, centered, in boldface
and in slanted type. Lines 4 and@6 put extra white space between those
lines and their successors. \ (We shall discuss units of measure like
`|6pt|' and `|.5cm|' in Chapter@10.)
The main bulk of the story appears on lines 7--15, and it consists of
two ↑{paragraphs}. The fact that line@11 is blank informs \TeX\ that
↑(blank line) ↑(empty line)
line@10 is the end of the first paragraph; and the `|\vskip|' on line@16
implies that the second paragraph ends on line@15, because vertical
skips don't appear in paragraphs. Incidentally, this example seems
to be quite full of \TeX\ commands; but it is atypical in that respect,
because it is so short and because it is supposed to be teaching things.
Messy constructions like |\vskip| and |\centerline| can be expected at the
very beginning of a manuscript, unless you're using a canned format, but
they don't last long; most of the time you will find yourself typing
straight text, with relatively few control sequences.
And now comes the good news, if you haven't used computer typesetting
before: You don't have to worry about where to break lines in a paragraph
(i.e., where to stop at the right margin and to begin a new line), because
\TeX\ will do that for you. Your manuscript file can contain long lines or
short lines, or both; it doesn't matter. This is especially helpful when
you make changes, since you don't have to retype anything except the words
that changed. {\sl Every time you begin a new line in your manuscript file
it is essentially the same as typing a space.} When \TeX\ has read an
entire paragraph---in this case lines 7 to@11---it will try to break up
the text so that each line of output, except the last, contains about the
same amount of copy; and it will hyphenate words if necessary to keep the
spacing consistent, but only as a last resort.
Line 8 contains the strange concoction
\begintt
\"O\"o\c c
\endtt
and you already know that |\"| stands for an ↑{umlaut} accent. The
|\c| stands for a ``↑{cedilla},'' so you will get `\"O\"o\c c' as the
name of that distant galaxy.
The remaining text is simply a review of the conventions that we discussed
long ago for dashes and quotation marks; except that the `|@|' signs in
lines 10 and@12 are a new wrinkle. These are called {\sl ↑{ties}}, because
they tie words together; i.e., \TeX\ is supposed to treat `|@|' as a
normal space but not to break between lines there.
A good typist will use ties within names, as shown in our
example; further discussion of ties appears in Chapter@14. ↑(at sign)
Finally, line@18 tells \TeX\ to `↑{*vfill}', i.e., to fill the rest of
the page with white space; and to `↑{:eject}' the page, i.e., to send it
to the output file.
\smallskip Now you're ready for Experiment@2: Get \TeX\ going again.
This time when the machine says `|**|' you should answer `|story|', since
that is the name of the file where your input resides. \ (The file
could also be called by its full name `|story.tex|', but \TeX\ automatically
supplies the suffix `|.tex|' if no suffix has been specified.)
↑(file names)
You might wonder why the first prompt was `↑{.**}', while the subsequent
ones are `↑{.*}'; the reason is simply that the first thing you type to
\TeX\ is slightly different from the rest: If the first character of your
response to `|**|' is not a backslash, \TeX\ automatically inserts
`↑{*input}'. Thus you can usually run \TeX\ by merely naming your input
file. \ (Previous \TeX\ systems required you to start by typing `|\input
story|' instead of `|story|', and you can still do that; but most \TeX\
users prefer to put all of their commands into a file instead of typing
them online, so \TeX\ now spares them the nuisance of starting out with
|\input| each time.) \ Recall that in Experiment@1 you typed `|\relax|';
that started with a backslash, so |\input| was not implied.
\danger There's actually another difference between `|**|' and `|*|': If the
first character after |**| is an ↑{ampersand} (\thinspace`|&|'\thinspace),
\TeX\ will replace its memory with a precomputed ↑{format file} before
proceeding. Thus, for example, you can type `|&plain \input story|' or
even `|&plain story|' in response to `|**|', if you are running some
version of \TeX\ that might not have the plain format preloaded.
↑(preloaded formats)
\danger Incidentally, many systems allow you to invoke \TeX\ by typing a
one-liner like `|tex story|' instead of waiting for the `|**|'; similarly,
`|tex \relax|' works for Experiment@1, and `|tex &plain story|' loads the
plain format before inputting the |story| file. You might want to try
this, to see if it works on your computer, or you might ask somebody if
there's a similar shortcut.
As \TeX\ begins to read your story file, it types `|(story.tex|', possibly
with a version number for more precise identification, depending on your
local operating system. Then it types `|[1]|', meaning that page@1 is done;
and `|)|', meaning that the file has been entirely input.
\TeX\ will now prompt you with `|*|', because the file did not contain
`↑{*end}'. Enter |\end| into the computer now, and you should get a file
|story.dvi| containing a typeset version of Thor's story. As in Experiment@1,
you can proceed to convert |story.dvi| into hardcopy; go ahead and do that now.
The typeset output won't be shown here, but you can see the results by
doing the experiment personally. Please do so before reading on.
\exercise Statistics show that only 7.43 of 10 people who read this manual
actually type the |story.tex| file as recommended, but that those people
learn \TeX\ best. So why don't you join them?
\answer Laziness and/or obstinacy.
\exercise Look closely at the output of Experiment@2, and compare it to
|story.tex|\thinspace: If you followed the instructions carefully, you
will notice a typographical error. What is it, and why did it sneak in?
\answer There's an unwanted space after `called---', because (as the book
says) \TeX\ treats the end of a line as if it were a blank space. That
blank space is usually what you want, except when a line ends with a
hyphen or a dash; so you should {\sc WATCH OUT} for lines that end with
hyphens or dashes.
With Experiment 2 under your belt, you know how to make a document from a
file. The remaining experiments in this chapter are intended to help you
cope with the inevitable anomalies that you will run into later; we will
intentionally do things that will cause \TeX\ to ``squeak.''
But before going on, it's best to fix the error revealed by the previous
output (see exercise 6.2): Line@13 of the |story.tex| file should be changed to
\begintt
he preferred to be called---% error has been fixed!
\endtt
The `|%|' sign here ↑(percent) is a feature of plain \TeX\ that we haven't
discussed before: It effectively terminates a line of your input file,
without introducing the blank space that \TeX\ ordinarily inserts when
moving to the next line of input. Furthermore, \TeX\ ignores everything
that you type following a |%|, up to the end of that line in the file;
you can therefore put ↑{comments} into your manuscript, knowing that the
comments are for your eyes only.
Experiment 3 will be to make \TeX\ work harder, by asking it to set the
story in narrower and narrower columns. Here's how: After starting the
program, type
\begintt
\hsize=4in \input story
\endtt
in response to the `|**|'. This means, ``Set the story in a 4-inch column.''
More precisely, ↑{*hsize} is a primitive of \TeX\ that specifies the horizontal
size, i.e., the width of each line in the output when a paragraph is being
typeset; and ↑{*input} is a primitive that causes \TeX\ to read the specified
file. Thus, you are instructing the machine to change the normal setting of
|\hsize| that was defined by plain \TeX, and then to process |story.tex|
under this modification.
\TeX\ should respond by typing something like `|(story.tex [1])|' as
before, followed by `|*|'. Now you should type
\begintt
\hsize=3in \input story
\endtt
and, after \TeX\ says `|(story.tex [2])|' asking for more, type three more lines
\begintt
\hsize=2.5in \input story
\hsize=2in \input story
\end
\endtt
to complete this four-page experiment.
Don't be alarmed when \TeX\ screams `|Overfull| |\hbox|' several times as
it works at the 2-inch size; that's what was supposed to go wrong during
Experiment@3. There simply is no good way to break the given
paragraphs into lines that are exactly two inches wide, without making
the spaces between words come out too large or too small. Plain \TeX\
has been set up to ensure rather strict tolerances on all of the lines it
produces:
$$\displayvbox{
\hbox expand-1em{you don't get spaces between words
narrower than this,\ and}
\hbox expand+1.679895em{you don't get spaces between words wider
than this.}}$$
If there's no way to meet these restrictions, you get an ↑{overfull box}.
And with the overfull box you also get (1)@a warning message, printed
on your terminal, and (2)@a big black bar inserted at the right of the
offending box, in your output. \ (Look at page@4 of the output from
Experiment@3; the overfull boxes should stick out like sore thumbs.
On the other hand, pages 1--3 should be perfect.)
Of course you don't want overfull boxes in your output, so \TeX\ provides
several ways to remove them; that will be the subject of our Experiment@4.
But first let's look more closely at the results of Experiment@3, since
\TeX\ reported some potentially valuable information when it was forced
to make those boxes too full; you should learn how to read this data:
\begintt
Overfull \hbox (1.02238pt too wide) in paragraph at lines 7--11
\tenrm tant galaxy called []O↑↑So↑↑Xc, there lived|vrt
Overfull \hbox (0.50156pt too wide) in paragraph at lines 7--11
\tenrm a com-puter named R. J. Drof-nats. |vrt
Overfull \hbox (5.42514pt too wide) in paragraph at lines 12--16
\tenrm he pre-ferred to be called---was hap-|vrt
\endtt
Each overfull box is correlated with its location in your input file
(e.g., the first two were generated when processing the paragraph on
lines 7--11 of |story.tex|), and you also learn by how much the copy
sticks out (e.g., 1.02238 points).
Notice that \TeX\ also shows the contents of the overfull boxes in
abbreviated form. For example, the last one has the words `he preferred
to be called---was hap-', set in font |\tenrm| (10-point roman type);
the first one has a somewhat curious rendering of `\"O\"o\c c', because the
accents appear in strange places within that font. In general, when you
see `↑{.[]}' in one of these messages, it stands either for
the paragraph indentation or for some sort of complex construction;
in this particular case it stands for an umlaut that has been raised
up to cover an `O'.
\dangerexercise Can you explain the `\vrt' that appears after
`|lived|' in that message?
\answer It represents the heavy bar that shows up in
your output. \ (This bar wouldn't be present if\/ ↑{*overfullrule} had been
set to zero, nor is it present in an underfull box.)
\ddangerexercise Why is there a space before the `\vrt' in `|Drof-nats.
|\vrt'\thinspace?
\answer This is the ↑{*parfillskip} space that ends the paragraph.
In plain \TeX\ the parfillskip is zero when the last line of the paragraph
is full; hence no space actually appears before the rule in the output
of Experiment@3. But all hskips show up as spaces in an overfull box
message, even if they're zero.
You don't have to take out pencil and paper in order to write down the
overfull box messages that you get before they disappear from view, since
\TeX\ always writes a ``↑{transcript}'' or ``↑{log file}'' that records what
happened during each session. For example, you should now have a file
called |story.log| containing the transcript of Experiment@3, as well
as a file called |texput.log| containing the transcript of Experiment@1. \
(The transcript of Experiment@2 was probably overwritten when you did
number@3.) \ Take a look at |story.log| now; you will see that the overfull
box messages are accompanied not only by the abbreviated box contents,
but also by some strange-looking data about hboxes and glue and kerns and
such things. This data gives a precise description of what's in that
overfull box; \TeX\ wizards will find such
listings important, if they are called upon to diagnose some mysterious
error, and you too may want to understand \TeX's internal code some day.
The abbreviated forms of overfull boxes show the hyphenations that
\TeX\ tried before it resorted to overfilling. The ↑{hyphenation} algorithm,
which is described in Appendix@H\null, is excellent but not perfect; for
example, you can see from the messages in |story.log| that \TeX\ finds the
hyphen in `pre-ferred', and it can even hyphenate `Drof-nats'. Yet it
discovers no hyphen in `galaxy', and every once in
a@while an overfull box problem can be cured simply by giving \TeX\ a hint
about how to hyphenate some word more completely. \ (We will see later that
there are two ways to do this, either by inserting ↑{discretionary hyphens}
each time as in `\hbox{|gal\-axy|}', or by saying
`\hbox{|\hyphenation{gal-axy}|}' once at the beginning of your manuscript.)
In the present example, hyphenation is not a problem, since \TeX\ found
and tried all the hyphens that could possibly have helped. The only way to
get rid of the overfull boxes is to change the tolerance, i.e., to allow
wider spaces between words. Indeed, the tolerance that plain \TeX\ uses
for wide lines is completely inappropriate for 2-inch columns; such narrow
columns simply can't be achieved without loosening the constraints, unless
you rewrite the copy to fit.
\TeX\ assigns a numerical value called ``↑{badness}'' to each line that
it sets, in order to assess the quality of the spacing. The exact rules
for badness are different for different fonts, and they will be discussed
in Chapter@14; but here is the way badness works for the roman font
of plain \TeX:
$$\displayvbox{\hbadness10000
\halign{#\hfil&\hskip3em#\hfil\cr
\hbox expand-.666667em{The badness of this line is 100.}&(very tight)\cr
\hbox expand-.333333em{The badness of this line is 12.}&
(somewhat tight)\cr
\hbox{The badness of this line is 0.}&(perfect)\cr
\hbox expand.5em{The badness of this line is 12.}&(somewhat loose)\cr
%\hbox expand 1em{The badness of this line is 100.}&(loose)\cr % then "looser"
\hbox expand 1.259921em{The badness of this line is 200.}&(loose)\cr
%\hbox expand 1.713em{The badness of this line is 500.}\cr
\hbox expand 2.155em{The badness of this line is 1000.}&(bad)\cr
\hbox expand 3.684em{The badness of this line is 5000.}& % actually 4995!
(awful)\cr}}$$
Plain \TeX\ normally stipulates that no line's badness should exceed 200;
but in our case, the task would be impossible since
$$\displayvbox{\hbadness 10000
\hbox{`\hbox to 2in{tant galaxy called \"O\"o\c c, there}'\hskip 3em
has badness 1515;}
\hbox{`\hbox to 2in{he preferred to be called---was}'\hskip 3em
has badness 552.}}$$
So we turn now to Experiment@4, in which spacing variations that are
more appropriate to narrow columns will be used.
Run \TeX\ again, and begin this time by saying
\begintt
\hsize=2in \tolerance=1600 \input story
\endtt
so that lines with badness up to 1600 will be tolerated. Hurray! there are
↑(*tolerance)
no overfull boxes this time. \ (But you do get a message about an {\sl
underfull\/} box, since \TeX\ reports all boxes whose badness exceeds
a certain threshold called ↑{*hbadness}; plain \TeX\ sets |\hbadness=1000|.) \
↑(underfull box)
Now make \TeX\ work still harder by trying
\begintt
\hsize=1.5in \input story
\endtt
(thus leaving the tolerance at 1600 but making the ↑{column width} still
↑(measure, see hsize)
skimpier). Alas, overfull boxes return; so try typing
\begintt
\tolerance=10000 \input story
\endtt
in order to see what happens. \TeX\ treats 10000 as if it were ``infinite''
tolerance, allowing arbitrarily wide space; thus, a tolerance of 10000 will
{\sl never\/} produce an overfull box, unless something strange occurs like
an unhyphenatable word that is wider than the column itself.
Two boxes are reported underfull at the 1.5-inch setting; with such narrow
limits, occasional wide space is unavoidable. But try
\begintt
\raggedright \input story
\endtt
for a change. \ ↑(:raggedright)(This tells \TeX\ not to worry about keeping
the right margin straight, and to keep the spacing uniform within each line.) \
Finally, type
\begintt
\hsize=.75in \input story
\endtt
followed by `|\end|', to complete Experiment 4. This makes the columns
almost impossibly narrow.
\danger The output from this experiment will give you some feeling for the
problem of ↑{breaking a paragraph} into approximately equal lines. When the
lines are relatively wide, \TeX\ will almost always find a good solution.
But otherwise you will have to figure out some compromise, and several
options are possible. Suppose you want to ensure that no lines have
badness exceeding@500. Then you could set |\tolerance| to some high
number, and |\hbadness=500|; \TeX\ would not produce overfull boxes, but
it would warn you about the underfull ones. Or you could set
|\tolerance=500|; then \TeX\ might produce overfull boxes. If you really
want to take corrective action, the second alternative is better, because
you can look at an overfull box to see how much sticks out; it becomes
graphically clear what remedies are possible. On the other hand, if you
don't have time to fix bad spacing---if you just want to know how bad it
is---then the first alternative is better, although it may require more
computer time.
\dangerexercise When |\raggedright| has been specified, badness reflects
the amount of space at the right margin, instead of the spacing between
words. Devise an experiment by which you can easily determine what
badness \TeX\ assigns to each line, when the |story| is set ragged-right
in 1.5-inch columns.
\answer Run \TeX\ with \hbox{|\hsize=1.5in|} \hbox{|\tolerance=10000|}
\hbox{|\raggedright|} \hbox{|\hbadness=-1|} and then |\input story|. \TeX\ will
report the badness of all lines (except the final lines of paragraphs, where
fill glue makes the badness zero).
\danger A parameter called ↑{*hfuzz} allows you to ignore boxes that are only
slightly overfull. For example, if you say |\hfuzz=1pt|, a box must stick
out more than one point before it is considered erroneous. Plain \TeX\
sets |\hfuzz=0.1pt|.
\dangerexercise Inspection of the output from Experiment@4, especially
page@3, shows that with narrow columns it would be better to allow white
space to appear before and after a dash, whenever other spaces in the
same line are being stretched. Define a ↑{:dash} macro that does this.
\answer |\def\extraspace{{\nobreak \hskip 0pt plus .15em}}|\parbreak
|\def\dash{\unskip\extraspace---\extraspace}|\par\nobreak\smallskip\noindent
(If you try this with the story at 2-inch and 1.5-inch sizes, you will
notice a substantial improvement. The |\unskip| allows people to leave a
space before typing |\dash|. \TeX\ will try to hyphenate before |\dash|,
but not before `|---|'; cf.\ Appendix@H\null. The extra braces in
|\extraspace| are a precaution in case the next word is `|minus|'.)
You were warned that this is a long chapter. But take heart: there's only
one more experiment to do, and then you will know enough about \TeX\ to
run it fearlessly by yourself. The only thing you are still missing is some
information about how to cope with error messages, i.e., not just with
warnings about things like overfull boxes, but with cases where \TeX\
actually stops and asks you what to do next.
{Error messages} can be terrifying when you aren't prepared for them;
but they can be fun when you have the right attitude. Just remember that
you really haven't hurt the computer's feelings, and that nobody will
hold the errors against you. Then you'll find that running \TeX\ might
actually be a creative experience instead of something to dread.
The first step in Experiment 5 is to plant two intentional mistakes in the
|story.tex| file. Change line@3 to
\begintt
\centerline{\bf A SHORT \ERROR STORY}
\endtt
and change `|\vskip|' to `|\vship|' on line@2.
Now run \TeX\ again; but instead of `|story|' type `|sorry|'. The computer
should respond by saying that it can't find file |sorry.tex|, and it will
ask you to try again. Just hit \<return> this time; you'll see
that you had better give the name of a real file. So type `|story|' and
wait for \TeX\ to find one of the {\sl faux pas\/} in that file.
Ah yes, the machine will soon stop\footnote*{Some installations of \TeX\ do
not allow interaction. In such cases all you can do is look at the error
messages in your log file, where they will appear together with the ``help''
information.}, after typing something like this:
\begintt
! Undefined control sequence.
l.2 \vship
1in
?
\endtt
\TeX\ begins its ↑{error messages} with `|!|', and it shows what it was
reading at the time of the error by displaying two lines of context. The
top line of the pair (in this case `|\vship|'\thinspace) shows what \TeX\
has looked at so far, and where it came from (`|l.2|', i.e., line number@2);
the bottom line (in this case `|1in|'\thinspace) shows what \TeX\ has yet
to read.
The `↑{.?}' that appears after the context display means that \TeX\ wants
advice about what to do next. If you've never seen an error message before,
or if you've forgotten what sort of response is expected, you can type
`|?|' now (go ahead and try it!); \TeX\ will respond as follows:
\begintt
Type <return> to proceed, S to scroll future error messages,
R to run without stopping, Q to run quietly,
I to insert something, E to edit your file,
1 or ... or 9 to ignore the next 1 to 9 tokens of input,
H for help, X to quit.
\endtt
This is your menu of options. You may choose to continue in various ways:
\smallskip\item{1.}
Simply type \<return>. \TeX\ will resume its processing, after
attempting to recover from the error as best it can.
\smallbreak\item{2.} Type `|S|'. \TeX\ will proceed without
pausing for instructions if further errors arise. Subsequent error messages
will flash by on your terminal, possibly faster than you can read them, and
they will appear in your log file where you can scrutinize them at your
leisure. Thus, `|S|' is sort of like typing \<return> to every message.
\smallbreak\item{3.} Type `|R|'. This is like `|S|' but even stronger,
since it tells \TeX\ not to stop for any reason, not even if a file name
can't be found.
\smallbreak\item{4.} Type `|Q|'. This is like `|R|' but even more so,
since it tells \TeX\ not only to proceed without stopping but also to
suppress all further output to your terminal. It is a fast, but somewhat
reckless, way to proceed (intended for running \TeX\ with no operator in
aendttance).
\smallbreak\item{5.} Type `|I|', followed by some text that you want to
insert. \TeX\ will read this line of text before encountering what it
would ordinarily see next. Lines inserted in this way are not assumed to
end with a blank space. ↑(inserting text online)
↑(online interaction, see interaction) ↑(interacting with TeX)
\smallbreak\item{6.} Type a small number. \TeX\ will delete this many
characters and control sequences from whatever it is about to read next,
and it will pause again to give you another chance to look things over.
↑(deleting tokens)
\smallbreak\item{7.} Type `|H|'. This is what you should do now and whenever
you are faced with an error message that you haven't seen for a@while. \TeX\
has two messages built in for each perceived error: a formal one and an
informal one. The formal message is printed first (e.g., `|! Undefined
control sequence.|'\thinspace); the informal one is printed if you request
more help by typing `|H|', and it also appears in your log file if you
are scrolling error messages. The informal message tries to complement the
formal one by explaining what \TeX\ thinks the trouble is, and often
by suggesting a strategy for recouping your losses.↑(help messages)
\smallbreak\item{8.} Type `|X|'. This causes \TeX\ to stop working on your
job, after putting the finishing touches on your |log| file and on any
pages that have already been output to your |dvi| file.
\smallbreak\item{9.} Type `|E|'. This is like `|X|', but it also prepares
the computer to edit the file that \TeX\ is currently reading, at the
current position, so that you can conveniently make a change before
trying again.
\smallbreak\noindent
After you type `|H|' (or `|h|', which also works), you'll get a message
that tries to explain that the control sequence just read by \TeX\
(i.e., |\vship|) has never been assigned a meaning, and that you should
either insert the correct control sequence or you should go on as if the
offending one had not appeared.
In this case, therefore, your best bet is to type
\begintt
I\vskip
\endtt
(and \<return>), with no space after the `|I|'; this effectively replaces
|\vship| by |\vskip|. \ (Do it.)
If you had simply typed \<return> instead of
inserting anything, \TeX\ would have gone ahead and read `|1in|', which
it would have regarded as part of a paragraph to be typeset. Alternatively,
you could have typed `|3|'\thinspace; that would have deleted
`|1in|' from \TeX's input. Or you could have typed `|X|' or `|E|' in
order to correct the spelling error in your file. But it's usually
best to try to detect as many errors as you can, each time you run \TeX,
since that increases your productivity while decreasing your computer bills.
Chapter@27 explains more about the art of steering \TeX\ through
troubled text.
\dangerexercise What would have happened if you had typed `|5|' after
the |\vship| error?
\answer \TeX\ would have deleted five tokens: |1|, |i|, |n|, \],
|\centerline|. (The space was at the end of line@2, the |\centerline| at the
beginning of line@3.)
\danger You can control the level of interaction by giving commands
in your file as well as online: The \TeX\ primitives ↑{*scrollmode},
↑{*nonstopmode}, and ↑{*batchmode} correspond respectively to typing
`|S|', `|R|', or `|Q|' in response to an error message, and
↑{*errorstopmode} puts you back into the normal level of interaction. \
(Such changes are global, whether or not they appear inside a group.) \
Furthermore, many installations have implemented a way to ↑{interrupt}
\TeX\ while it is running; such an interruption causes the program to
revert to |\errorstopmode|, after which it pauses and waits for
further instructions.
What happens next in Experiment 5? \TeX\ will hiccup on the other bug that
we planted in the file. This time, however, the error message is more
elaborate, since the context appears on six lines instead of two:
\begintt
! Undefined control sequence.
<argument> \bf A SHORT \ERROR
STORY
\centerline #1->\line {\hss #1
\hss }
l.3 \centerline{\bf A SHORT \ERROR STORY}
|null
?
\endtt
You get multiline error messages like this when the error is detected
while \TeX\ is processing some higher-level commands---in this case,
while it is trying to carry out |\centerline|, which is not a primitive
operation (it is defined in plain \TeX). At first, such error
messages will appear to be complete nonsense to you, because much of what
you see is low-level \TeX\ code that you never wrote. But you can overcome
this hangup by getting a feeling for the way \TeX\ operates.
First notice that the context information always appears in pairs of lines.
As before, the top line shows what \TeX\ has just read (\thinspace
`|\bf A SHORT \ERROR|'\thinspace), then comes what it is about to read
(\thinspace`|STORY|'\thinspace). The next pair of lines shows the context
of the first two; it indicates what \TeX\ was doing just before it began to
read the others. In this case, we see that \TeX\ has just read `|#1|', which
is a special code that tells the machine to ``read the first ↑{argument} that
is governed by the current control sequence''; i.e., ``now read the stuff that
↑{:centerline} is supposed to center on a line.'' The definition in Appendix@B
says that |\centerline|, when applied to some text, is supposed to be carried
out by sticking that text in place of the `|#1|' in `|\line{\hss#1\hss}|'.
So \TeX\ is in the midst of this expansion of\/ |\centerline|, as well as being
in the midst of the text that is to be centered.
The bottom line shows how far \TeX\ has gotten so far in the |story| file.
\ (Actually the bottom line is blank in this example; what appears to be
the bottom line is really the first of two lines of context, and it
indicates that \TeX\ has read everything including the `|}|' in line@3 of
the file.) \ Thus, the context in this error message gives us a glimpse of
how \TeX\ went about its business. First, it saw |\centerline| at the
beginning of line@3. Then it looked at the definition of\/ |\centerline| and
noticed that |\centerline| takes an ``argument,'' i.e., that it applies to
the next character or control sequence or group that follows. So \TeX\
read@on, and filed `|\bf A SHORT \ERROR STORY|' away as the argument to
|\centerline|. Then it began to read the expansion, as defined in
Appendix@B\null. When it reached the |#1|, it began to read the
argument it had saved. And when it reached |\ERROR|, it complained about
an undefined control sequence.
\dangerexercise Why didn't \TeX\ complain about |\ERROR| being undefined
when |\ERROR| was first encountered, i.e., before reading `|STORY}|' on line@3?
\answer A control sequence like |\centerline| might well define a control
sequence like |\ERROR| before telling \TeX\ to look at |#1|. Therefore
\TeX\ doesn't interpret control sequences when it scans an argument.
When you get a multiline error message like this, the best clues about the
source of the trouble are usually on the bottom line (since that is what
you typed) and on the top line (since that is what triggered the error
message). Somewhere in there you can usually spot the problem.
Where should you go from here? If you type `|H|' now, you'll just get
the same help message about undefined control sequences that you saw
before. If you respond by typing \<return>, \TeX\ will go
on and finish the run, producing output virtually identical to that in
Experiment@2. In other words, the conventional responses won't teach you
anything new. So type `|E|' now; this terminates the run and prepares
the way for you to fix the erroneous file. \ (On some systems, \TeX\ will
actually start up the standard text editor, and you'll be positioned at
the right place to delete `|\ERROR|'. On other systems, \TeX\ will simply
tell you to edit line@3 of file |story.tex|.) ↑(editing)
When you edit |story.tex| again, you'll notice that line@2 still contains
|\vship|; the fact that you told \TeX\ to insert |\vskip| doesn't mean
that your file has changed in any way. In general, you should correct all
errors in the input file that were spotted by \TeX\ during a run; the
log file provides a handy way to remember what those errors were.
\smallskip
Well, this has indeed been a long chapter, so let's summarize what has
been accomplished. By doing the five experiments you have learned at first
hand (1)@how to get a job printed via \TeX; (2)@how to make a file that
contains a complete \TeX\ manuscript; (3)@how to change the plain \TeX\
format to achieve columns with different widths; and (4)@how to avoid
panic when \TeX\ issues stern@warnings.
So you could now stop reading this book and go on to print a bunch of
documents. It is better, however, to continue bearing with the author
(after perhaps taking another rest), since you're just at the threshold
of being able to do a lot more. And you ought to read Chapter@7
at least, because it warns you about certain symbols that you must not
type unless you want \TeX\ to do something special. While reading the
remaining chapters it will, of course, be best for you to continue making
trial runs, using experiments of your own design.
\endchapter
What we have to learn to do we learn by doing.
\author ↑{ARISTOTLE}, {\sl Ethica Nicomachea\/} II (c.@325 B.C.)
\bigskip
He that runs may read.
\author WILLIAM ↑{COWPER}, {\sl Tirocinium\/} (1785)
\eject
\beginchapter Chapter 7. How \TeX\ Reads\\What You Type
We observed in the previous chapter that an input manuscript is expressed
in terms of ``lines,'' but that these lines of input are essentially
independent of the lines of output that will appear on the finished pages.
Thus you can stop typing a line of input at any place that's convenient for
you, as you prepare or edit a file. A few other related rules have also
been mentioned:
\medskip
\item\bull A $\langle\hbox{return}\rangle$ is like a space.
\smallskip
\item\bull Two spaces in a row count as one space.
\smallskip
\item\bull A blank line denotes the end of a paragraph.
\medskip
\noindent Strictly speaking, these rules are contradictory: A blank line
is obtained by typing $\langle\hbox{return}\rangle$ twice in a row,
and this is different from typing two spaces in a row. Some day you might want
to know the {\sl real\/} rules. In this chapter and the next, we shall study
the very first stage in the transition from input to output.
\smallskip
In the first place, it's wise to have a precise idea of what your keyboard
sends to the machine. There are 128 characters that \TeX\ might encounter at
each step, in a file or in a line of text typed directly on your terminal. These
128@characters are classified into 16 categories numbered 0 to 15:
$$\halign{\indent\hfil#&&\qquad#\hfil\cr
\hfil\hidewidth\it Category\hidewidth&\it \qquad Meaning\hidewidth\cr
\noalign{\smallskip}
0&Escape character&(|\| in this manual)\cr
1&Beginning of group&(|{| in this manual)\cr
2&End of group&(|}| in this manual)\cr
3&Math shift&(|$| in this manual)\cr
4&Alignment tab&(|&| in this manual)\cr
5&End of line&(\<return> in this manual)\cr
6&Parameter&(|#| in this manual)\cr
7&Superscript&(|↑| in this manual)\cr
8&Subscript&(|_| in this manual)\cr
9&Ignored character&(\<null> in this manual)\cr
10&Space&(\] in this manual)\cr
11&Letter&(|A|, \dots, |Z| and |a|, \dots, |z|)\cr
12&Other character&(none of the above or below)\cr
13&Active character&(|@| in this manual)\cr
14&Comment character&(|%| in this manual)\cr
15&Invalid character&(\<delete> in this manual)\cr}$$
↑(escape character)
↑(begin-group character)
↑(end-group character)
↑(math mode character)
↑(alignment tab)
↑(parameter)
↑(superscript)
↑(subscript)
↑(ignored character)
↑(space)
↑(letter)
↑(other character)
↑(active character)
↑(comment character)
↑(invalid character)
↑(category codes, table)
It's not necessary for you to learn these code numbers; the point is only that
\TeX\ responds to 16@different types of characters. At first this manual led
you to believe that there were just two types---the escape character and the
others---and then you were told about two more types, the grouping
symbols |{| and |}|. In Chapter@6 you learned two more: |@| and |%|.
Now you know that there are really@16. This is the whole truth of the
matter; no more types remain to be revealed. The category code for any
character can be changed at any time, but it is usually wise to stick to a
↑(reserved character) ↑(special character table) ↑(<null) ↑(<delete)
particular scheme.
The main thing to bear in mind is that each \TeX\ format reserves certain
characters for its own special purposes. For example, when you are using plain
\TeX\ format (Appendix@B\null), you need to know that the ten characters
\begintt
\ { } $ & # ↑ _ % @
\endtt
cannot be used in the ordinary way when you are typing;
↑(special characters)
↑(backslash)↑(left brace)↑(right brace)↑(dollar sign)↑(ampersand)
↑(hash mark)↑(caret)↑(underline)↑(percent)↑(at sign)
↑(single-character control sequences)
each of them will cause \TeX\ to do something special, as explained elsewhere
in this manual. If you really need these symbols as part of your manuscript,
plain \TeX\ makes it possible for you to type
$$\halign{\indent#\hfil&\qquad#\hfil\cr
|\$| for \$,& |\%| for \%,\cr
|\&| for \&,& |\@| for \@,\cr
|\#| for \#,& |\_| for \_\thinspace;\cr}$$
the |\_| symbol is useful for {\it compound\_identifiers\/} in computer
↑(identifiers) ↑(computer programs)
programs. In mathematics formulas you can use |\{| and |\}| for $\{$ and
$\}$, while ↑{:backslash} produces a ↑{reverse slash}; for example,
$$\displaybox{`|$\{a \backslash b\}$|'\quad yields\quad
`$\{a\backslash b\}$'.}$$
Furthermore |\↑| produces a circumflex accent (e.g., `|\↑e|' yields
`\↑e'\thinspace).
\exercise What horrible errors appear in the following sentence?
\begintt
Proctor & Gamble's stock climbed to $2, a 10% gain.
\endtt
\answer Three forbidden characters were used. One should type
\begintt
Proctor \& Gamble's ... \$2, a 10\% gain.
\endtt
(Also the facts are wrong.)
\exercise Can you imagine why the designer of plain \TeX\ decided not
to make `|\\|' the control sequence for reverse slashes?↑(backslash)
\answer Reverse slashes (backslashes) are fairly uncommon in formulas or
text, and |\\| is very easy to type; it was therefore felt best not to
reserve |\\| for such limited use. Typists can define |\\| to be whatever
they want (including |\backslash|).
\danger When \TeX\ reads a line of text from a file, or a line of text that
you entered directly on your terminal, it converts that text into a list of
``↑{tokens}.'' A token is either (a)@a single character with an attached
category code, or (b)@a control sequence. For example, if the normal
conventions of plain \TeX\ are in force, the text `|{\hskip 36 pt}|' is
converted into a list of eight tokens:
$$\dbox{|{|$_1$\quad\cstok{hskip}\quad|3|$_{12}$\quad|6|$_{12}$\quad
\]$_{10}$\quad|p|$_{11}$\quad|t|$_{11}$\quad|}|$_{2}$\hss}$$
The subscripts here are the category codes, as listed earlier: 1 for
``beginning of group,'' 12 for ``other character,'' and so on. The
\cstok{hskip} doesn't get a subscript, because it represents a control
sequence token instead of a character token. Notice that the space after
|\hskip| does not get into the token list, because it follows a
↑{control word}.
\danger It is important to understand the idea of token lists, if you want
to gain a thorough understanding of \TeX, and it is convenient to learn
the concept by thinking of \TeX\ as if it were a living organism. The
individual lines of input in your files are seen only by \TeX's ``eyes''
and ``mouth''; but after that text has been gobbled up, it is sent to
\TeX's ``stomach'' in the form of a token list, and the digestive processes
that do the actual typesetting are based entirely on tokens. As far as the
stomach is concerned, the input flows in as a stream of tokens, somewhat
as if your \TeX\ manuscript had been typed all on one extremely long line.
\danger You should remember two chief things about \TeX's tokens: (1)@A
control sequence is considered to be a single object that is no longer
composed of a sequence of symbols. Therefore long control sequence names
are no harder for \TeX\ to deal with than short ones, once they have been
converted to tokens. Furthermore, spaces are not ignored after control
sequences inside a token list; the ignore-space rule applies only in an
input file, during the time that strings of characters are being
tokenized. (2)@Once a category code has been attached to a character
token, the attachment is permanent. For example, if character `|{|' were
suddenly declared to be of category@12 instead of category@1, the
characters `|{|$_1$' already inside token lists of \TeX\ would still
remain of category 1; only newly-made lists would contain `|{|$_{12}$'
tokens. In other words, individual characters receive a fixed
interpretation as soon as they have been read from a file, based on the
category they have at the time of reading. Control sequences are
different, since they can change their interpretation at any time. \TeX's
digestive processes always know exactly what a character token signifies,
because the category code appears in the token itself; but when the
digestive processes encounter a control sequence token, they must look up
the current definition of that control sequence in order to figure out
what it means.
\dangerexercise Some of the category codes 0 to 15 will never appear as
subscripts in character tokens, because they disappear in \TeX's mouth.
For example, characters of category 0 (escapes) never get to be tokens.
Which categories can actually reach \TeX's stomach?
\answer 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13. ↑{Active characters} (type 13)
are somewhat special; they behave like control sequences in most cases
(e.g., when you say `↑{*let}|\x=@|' or `↑{*if}|\x@|'), but they behave like
character tokens when they appear in the token list of ↑{*uppercase}
or ↑{*lowercase}.
\ddanger There's a program called ↑{.INITEX} that is used to install
\TeX, starting from scratch; |INITEX| is like \TeX\ except that it can
do even more things. It can compress ↑{hyphenation} patterns into special
tables that facilitate rapid hyphenation, and it can
produce ↑{format} files like `|plain.fmt|' from `|plain.tex|'.
But |INITEX| needs extra space to carry out such tasks, so it generally
has less memory available for typesetting than you would expect to find in a
production version of \TeX.
\ddanger When |INITEX| begins, it knows nothing
but \TeX's primitives. All 128@characters are initially of category@12,
except that ↑{<return} has category@5,
↑{<space} has category@10, ↑{<null} has category@9, ↑{<delete} has category@15,
the 52 letters |A|$\,\ldots\,$|Z| and |a|$\,\ldots\,$|z| have category@11,
and ↑{backslash} has category@0.
It follows that |INITEX| is initially incapable of carrying out some of
\TeX's primitives that depend on grouping; you can't use |\def| or |\hbox|
until there are characters of categories 1 and@2.
Appendix@B begins with ↑{*catcode} commands to provide characters of the
necessary categories; e.g.,
\begintt
\catcode`\{=1
\endtt
assigns category 1 to the |{| symbol. The |\catcode| operation is like
many other primitives of \TeX\ that we shall study later; by modifying
internal codes like the category codes, you can adapt \TeX\ to a wide
variety of applications.
\ddangerexercise Suppose that the commands
\begintt
\catcode`\<=1 \catcode`\>=2
\endtt
appear near the beginning of a group that begins with `|{|'; these
specifications instruct \TeX\ to treat |<| and |>| as group delimiters.
According to \TeX's rules of locality, the characters |<| and |>| will
revert to their previous categories when the ↑{group} ends. But should the
group end with |}| or@with@|>|\thinspace?
\answer It ends either |>| or |}| or with any character of category 2;
then the effects of all |\catcode| definitions within the group are wiped
out, except those that were ↑{*global}. \TeX\ doesn't have any built-in
knowledge about how to pair up particular kinds of grouping characters.
New category codes take effect as soon as a |\catcode| assignment has been
digested. For example,
\begintt
{\catcode`\>=2 >
\endtt
is a complete group. But without the space after `|2|' it would not be
complete, since \TeX\ would have read the@`|>|' and converted it to a
token before knowing what category code was being specified; \TeX\ always
reads the token following a constant before evaluating that constant.
\ddanger Although control sequences are treated as single objects,
\TeX\ does provide a way to break them into lists of character tokens:
If you write ↑{*string}|\cs|,
where |\cs| is any control sequence, you get the list of characters for that
control sequence's name. For example, |\string\TeX| produces four tokens:
|\|$_{12}$, |T|$_{12}$, |e|$_{12}$, |X|$_{12}$. Each character in this token
list automatically gets category code@12 (``other''),
including the ↑{backslash} that |\string| always inserts to represent an escape
character. However, category@10 will be assigned to the character `\]'
(blank ↑{space}) if a space character somehow sneaks into the name of a
control sequence.
\ddanger Conversely, you can go from a list of character tokens to a
control sequence by saying `↑{*csname}\<tokens>↑{*endcsname}'. The tokens
that appear in this construction between |\csname| and |\endcsname| may
include other control sequences, as long as those control sequences
ultimately expand into characters instead of \TeX\ primitives; the final
characters can be of any category, not necessarily letters. For example,
`|\csname TeX\endcsname|' is essentially the same as `|\TeX|'; but
`|\csname\TeX\endcsname|' is illegal, because |\TeX| expands into tokens
containing the ↑{*kern} primitive. Furthermore,
`|\csname\string\TeX\endcsname|' will produce the unusual control sequence
`|\\TeX|', i.e., the token \cstok{\char`\\TeX}, which you can't ordinarily
write.
\ddangerexercise Experiment with \TeX\ to see what |\string| does when it
is followed by an ↑{active character} like |@|. \ (Active characters behave
like control sequences, but they are not prefixed by an escape.) \ What
is an easy way to conduct such experiments online? What control sequence
could you put after |\string| to@obtain the single character
token@|\|$_{12}$?
\answer If you type `|\message{\string@}|' and `|\message{\string\@}|', \TeX\
responds with `|@|' and `|\@|', respectively. ↑(*message)
To get |\|$_{12}$ from |\string| you therefore need to make backslash an
active character. One way to do this is
\begintt
{\catcode`/=0 \catcode`\P_tbf@←5KggC≥Kv←gQeS]Oq⎇|~∃qK]IiP~∀Q)!JAAAyw]kY0AG←]Qe←XAMKckK9GK|N≤AiQCPAs←j↓OKhA]QK\AQQKeJ↓CeJA9↑~∃i=WK]f↓EKio∃K\AyqGg]C5KxAC9HAy9∃]IGg9C[Kx↓SfA]=hABAM←Yki%←\Ai<AiQSLAKqKIGSgJ0~∃EK
CkgJ↓y9giIS]Ox↓G←]m∃eifA%hAi↑↓Ay9GM]C[KqK]IGM]C[KpN\R~(~∃9I⊃C]OKIKqKe
SgJA]QChAQ←WK]LAI←KL~∃AyqKqaC9ICMi∃e9giIS]O9
g]C[∀AC9gQeS]Op@@AEqK]IGM]C[KpNAae=IkGJ|~∀Q)!KeJA¬eJAi!eKJAMaCGKLAEKM=eJAi!JAyEp\AπQ¬aiKed`AKaaYCS9fA=v)KqaC9ICMi∃e|\R4∃9C]MoKdAq9xH1lbe|H↓yCxHavbe|⊂Ay9x⊂1vbexHA9:⊂1vbaxHAyEpH1vbI|H\~(~∃9I⊃C]OKIKqKe
SgJA]QK\Aq9Gg]¬[KxA%fAkg∃HAi↑↓IKMS9JABA
←]ie=XAgKEkK]G∀AM←d4∃iQJ↓MSegPAiS[∀XAiQ¬hAG←9ie←X↓gKck∃]GJA%fA[C⊃JAKcUSmCY∃]hAi<Ay9e∃YCqx4∃k]i%XASh↓SfAe∃IKMS9KH\AUgJAi!SfAM¬GhAi<AIKg%O\AB↓[CGe<Ay9S→k]IK→S]KHcx~∃MkGPAQQChX↓M←dA∃qC[a1JX~∀⊂I9ISMaYCs ←qwyqSMk]⊃KMS]∃Iw)Ka⎇y8yQekJAQKqh⎇q9KYg∃y8yM¬YgJAQKqh⎇q9MSyxHH~∃∃qaC]⊃fAi↑↓iQJApyiek∀AiKqP|ASMp↑Ay9QK1xA!Cg\OPAaeKYS←kg1rAEK∃\AIK→S]KH0~∃←d↓SLAyq)K1x↓QCfA KK\Aq9YKipAKck¬XAi↑↓y9eK1Cqxv↓ShAg!←kYH↓KqaC9H~∃i<AiQJ↓8yMC1gJAi∃qh|A=iQKe]SgJ\↓<PuS→k]IK→S]KH$~∃9C9goKd↓y9IK→9SMk9IKMS9KHFcm9Kqa¬]ICMQKe9S→q9Gg9C[JFE9K]I
g]C[∃9eKY¬q⎇xJ4∃9QM%Y9Ee∃CVA≥=iJAi!ChAB↓G←]iI←XAg∃ckK]
JAYS-JAiQ%fA[kMhAEJ↓kgKH↓oSiP↓GCeJl~∃Sh↓GC]]=hAEJ↓S]GYUIKHA%\A=w
←]ISQS←]C1|AiKahXAE∃GCkg∀AiQJ↓y9SMaxAoS1XA]←P~∃EJ↓gKK\↓oQK\↓y9SMU]IKM%]KIx↓Sg\OPAKqa¬]IKH8~∀~∃qIIC]≥KdA9QK18A!CfAi]↑A←i!KdA←AKeCi%←]fAQQChAAe←Ik
JAi←-K]fAM←[Ki!S]NA1SWJ~)y9giIS]Ox↓I←Kf8A∪LAe←jAoISiJAyvU]k5EKe⎇py]k[ Kd|X↓s←jA≥KhAi!JAIK
S[CX4∃Kck%mCYK9hA←L↓iQJApy]k[ Kd|v↓C]HA%LAs←TAoeSQJA=v)e←[C9]k[KICY⎇8q]k[E∃d|X~)s←jA≥KhAi!JA]k5EKdA∃qaeKMgKHA%\AY←]Kd[G¬gJA=me←[C8A]k[∃eCYgx\A
←HAKqC5aYJX4∃Ay9I←[C]9k[Ke¬Xdix≤Aae←⊃kGKf↓Ayqq%mxNX↓BAYSMhA←L↓M←kd↓i←WK9fAKC
PAQCYS]N~)GCiK≥←esDd\A)!JAy99k[EKIxA←a∃eCiS=\ASf↓eKIk9IC]h↓oQK\↓ShASLACaa1SKH~)i↑AC8AKqa1SGSh↓G←]gQC]h@!J]N\0AAy99k[EKHdixN↓ae←IUGKfA↓xdix≤RvAEUhASh↓I←Kf4∃gkaAeKgf↓YKCI%]NAu∃e←fX↓C]HA%hAGC8ACYg<AEJAUgKHA]SiPA9k[EKIfAiQ¬hACe∀AS\~)9)K0≥fAS]QKe]C0AeKO%giKeLA←dAACeC[∃iKef8A
←d↓KqC[AYJXA↓y9]k5EKdZ@`bkx≤~∃ae=IkGKLAAxZDkxNv↓C]HA%LAeK≥SgiKHAy9G=k]hkpAQ←Y⊃fAiQ∀AmCYUJ@fbXXAiQ∃\~∃Aq9]k[ Ke9G=k]hkpNAae=IkGKLAAxfDmxN\4∀~∃9⊃IC]O∃dA)Q∀AioS8A←aKICiS←9fA=v)kaaKIGCgKyywy8qi←WK8AYSgP⎇y⎇x↓C]H~)=vUY=oKeG¬gK⎇ymy8yi=WK\A1Sgh⎇q⎇xAO<AiQe=kOPA∧AOSm∃\Ai←-K\AY%ghAC9HAG←9mKeh4∃CYX↓←LAi!JAGQ¬eCGi∃efAi=WK]f↓i↑Ai!KSdA↓Akaa∃dAGCMJNNA=dAAA1←oKd↓GCgJ≤N~∃KEkSmC1K]if8A⊃Ke∀OfAQ=ntA¬GPA←_AiQJbdpAA←ggS YJAG!CeCGQKef~)QCfAQo↑ACMg←GS¬iKHAYCYkKLAGCY1KHAi!JA=v)kGG←⊃K|AC9HAiQ∀A=vU1GG←I∃|vAi!KgJAYCYkKL~∃Ce∀AGQC9OKCE1JAUkMhACf↓BAy9
CiG←⊃KxASL\Aπ←9mKeg%←\Ai<Akaa∃dAGCMJA[K¬]f~∃QQChA∧AGQCICGiKHASfAIKaYC
KHAEdASif↓y9kG
←IKx↓mCYk∀XAk]1KgfAQQJAyqkGG←⊃Kx~∃YCYkJ↓SfAu∃e↑@Q]QK\A9↑AGQ¬]OJA%fA[C⊃JR\A
←]mKIgS←\↓i↑AY=oKdA
CgJA%f~∃g%[SYCHXAkg%]NAi!JAy91GG←I∃x\A)!JAGCQKO←edAG←I∃fACe∃\OhA
QC]O∃H\A/!K\~∃yv]∪≥%)1|↓EKOS9fXAC1XAy9UGG←I∃xAC]⊂Ay9Y
G←IKpAmCYUKfACIJAuKI↑AKq
KahAQQCh~)iQJAywYKiQKeg|↓yCxAQ←↓yupAC]H↓yβxAQ←↓y5pAQCm∀Ay9k
G←IKpAmCYUKfAy| to@|Z|
and |\lccode| values |a| to@|z|.
\ddanger \TeX\ performs the |\uppercase| and |\lowercase| transformations
in its stomach, but the |\string| and |\number| and |\romannumeral|
and |\csname| operations are carried out en route to the stomach (like
macro expansion), as explained in Chapter@20.
\ddangerexercise What token list results from
`|\uppercase{a\lowercase{bC}}|'\thinspace?
\answer First |\uppercase| produces `|A\lowercase{BC}|'; then you get `|Abc|'.
\ddangerexercise \TeX\ has an internal integer parameter called ↑{*year} that is
set equal to the current year number at the beginning of every job. Explain how
to use |\year|, together with |\romannumeral| and |\uppercase|, to
print a copyright notice like
`\copyright\ \uppercase\expandafter{\romannumeral\the\year}'
for all jobs run in \number\the\year.
\answer |\copyright\ \uppercase\expandafter{\romannumeral\the\year}|
\ddangerexercise Define a control sequence |\appendroman| with three parameters
such that \hbox{|\appendroman#1#2#3|} defines control sequence |#1| to
expand to a control sequence whose name is the name of control sequence
|#2| followed by the value of the positive integer |#3| expressed in roman
numerals. For example, suppose |\count20| equals 30; then
`\hbox{|\appendroman\a\TeX{\count20}|}' should have the same effect as
`|\def\a{\TeXxxx}|'.↑(tricky macros)
\answer (We assume that parameter |#2| is not simply an active character.)
\begintt
\def\gobble#1{} % remove one token
\def\appendroman#1#2#3{\edef#1{\def#1{\csname
\expandafter\gobble\string#2\romannumeral#3\endcsname}}#1}
\endtt
\endchapter
Some bookes are to bee tasted,
others to bee swallowed,
and some few to bee chewed and disgested.
\author FRANCIS ↑{BACON}, {\sl Essayes\/} (1597) % p2 of orig edition
\bigskip
`Tis the good reader that makes the good book.
\author RALPH WALDO ↑{EMERSON}, {\sl Society \& Solitude\/} (1870) % Success
\eject
\beginchapter Chapter 8. The Characters\\You Type
A lot of different keyboards are used with \TeX, but few keyboards can
produce 128 different symbols. Furthermore, as we have seen, some of the
characters that you {\sl can\/} type on your ↑{keyboard} are reserved for
↑(terminal keyboard)
special purposes like escaping and grouping. Yet when we studied fonts it
was pointed out that there are 256 characters per font. So how can you
refer to the characters that aren't on your keyboard, or that have been
pre-empted for formatting?
One answer is to use control sequences. For example, the plain format
of Appendix B\null, which defines |%| to be an end-of-line symbol so that you
can use it for comments, also defines the control sequence |\%| to mean
a ↑{percent sign}.
To get access to any character whatsoever, you can type
$$\dbox{|\char|\<number>\hss}$$
where \<number> is any number from 0 to 255 (optionally followed by a space);
you will get the corresponding character from the current font. That's how
Appendix@B handles |\%|; it defines `|\%|' to be an abbreviation for
`|\char37|\]', since 37 is the character code for a percent sign.
The codes that \TeX\ uses internally to represent characters are based on
``↑{ascii},'' the American Standard Code for Information Interchange.
↑(internal character codes) ↑(character codes)
Appendix@C gives full details of this code, which assigns numbers to
certain control functions as well as to ordinary letters and punctuation
marks. For example, ↑{<space}${}=32$ and ↑{<return}${}=13$.
There are 94@standard visible symbols, and they have been assigned code
numbers from 33 to@126, inclusive.
It turns out that `|b|' is character number 98 in ascii. So you can
typeset the word |bubble| in a strange way by putting
\begintt
\char98 u\char98\char98 le
\endtt
into your manuscript, if the |b|-key on your typewriter is out of order. \
(Of course you need the |\|, |c|, |h|, |a|, and |r| keys to type `↑{*char}',
so let's hope that they are always working.)
\danger \TeX\ always uses the internal character code of Appendix@C
for the standard ascii characters,
regardless of what external coding scheme actually appears in the files
being read. Thus, |b| is 98 inside of \TeX\ even when your computer
normally deals with ↑{EBCDIC} or some other non-ascii scheme; the \TeX\
software has been set up to convert text files to internal code, and to
convert back to the external code when writing text files.
Device-independent (↑{.dvi}) output files use \TeX's internal code. In
this way, \TeX\ is able to give identical results on all computers.
\danger Character code tables like those in Appendix@C often give the code
numbers in {\sl ↑{octal notation}}, i.e., the radix-8 number system, in which
the digits are {\it0},@{\it1}, {\it2}, {\it3}, {\it4}, {\it5}, {\it6},
and@{\it7}.\footnote*{The author of this manual likes to use italic digits
for octal numbers, and typewriter type for hexadecimal numbers, in order
to provide a typographic clue to the underlying radix whenever possible.}
Sometimes {\sl↑{hexadecimal notation}\/} is also used, in which case the
digits are |0|,@|1|, |2|, |3|, |4|, |5|, |6|, |7|, |8|, |9|, |A|, |B|, |C|,
|D|, |E|, and@|F|. For example, the octal code for `|b|' is {\it142}, and
its hexadecimal code is |62|. A ↑{<number} in \TeX's language can begin
with@a@|'|, in which case it is regarded as octal, or with a |"|, when it is
regarded as hexadecimal. Thus, |\char'142| and |\char"62| are equivalent
to |\char98|. The legitimate character codes in octal notation run from
\oct0 to \oct{377}; in hexadecimal, they run from \hex0 to \hex{FF}.
↑(apostrophe)↑(doublequote)
\danger But \TeX\ actually provides another kind of \<number> that makes it
unnecessary for you to know ascii at all! The token |`|$_{12}$, when followed
by any character token or by any control sequence token whose name is a
single character, stands for \TeX's internal code for the character in
question. For example, |\char`b| and |\char`\b| are also equivalent to
|\char98|. ↑(reverse apostrophe)
If you look in Appendix@B to see how |\%| is defined, you'll notice that
the definition is
\begintt
\def\%{\char`\%}
\endtt
instead of\/ |\char37| as claimed above.
\dangerexercise What would be wrong with |\def\%{\char`%}|?
\answer The |%| would be treated as a comment character, because its
category code is@14; thus, no |%| token or |}| token would get through
to the gullet of \TeX\ where numbers are treated. When a character is
of category 0, 5, 9, 14, or@15, the extra |\| must be used; and the
|\| doesn't hurt, so you can always use it to be safe.
\ddanger The preface to this manual points out that the author
makes little white lies from time to time. Well, if you actually
check Appendix@B you'll find that
\begintt
\chardef\%=`\%
\endtt
is the true definition of\/ |\%|. Since format designers often want to
associate a special character with a special control sequence name, \TeX\
provides the construction `↑{*chardef}\<control sequence>|=|\<number>'
for numbers between 0 and 255, as an efficient alternative to
`↑{*def}\<control sequence>|{\char|\<number>|}|'.
Although you can use |\char| to access any character in the current
font, you can't use it in the middle of a control sequence. For example,
if you type
\begintt
\\char98
\endtt
\TeX\ reads this as the control sequence |\\| followed by |c|, |h|, |a|,
etc., not as the control sequence |\b|.
You will hardly ever need to use |\char| when typing a manuscript, since
the characters you want will probably be available as predefined control
sequences; |\char| is primarily intended for the designers of book formats
like those in the appendices. But some day you may require a ↑{special
symbol}, and you may have to hunt through a font catalog until you find
it. Once you find it, you can use it by simply selecting the appropriate
font and then specifying the character number with |\char|. For example,
the ``↑{dangerous bend}'' sign used in this manual appears as character
number@127 of font ↑{.manfnt}, and that font is selected by the control
sequence ↑{:manual}. The macros in Appendix@E therefore display dangerous
bends by saying `|{\manual\char127}|'.
We have observed that the ascii character set includes only 94 printable
symbols; but \TeX\ works internally with 128 different character codes,
from 0 to 127, each of which is assigned to one of the sixteen categories
described in Chapter@7. If your keyboard has additional symbols, or if it
doesn't have the standard@94, the people who installed your local \TeX\ system
can tell you the correspondence between what you type and the character
number that \TeX\ receives. Some people are fortunate enough to have keys
marked `{\tt\rlap/=}' and `{\tt\rlap<\char'32}' and `{\tt\rlap>\char'32}';
it is possible to install \TeX\ so that it will recognize these handy symbols
and make the typing of mathematics more pleasant. But if you do not have
such keys, you can get by with the control sequences ↑{:ne}, ↑{:le},
and ↑{:ge}. ↑(not-equal)↑(less-or-equal)↑(greater-or-equal)
\danger \TeX\ has a standard way to refer to the invisible characters of ascii:
Code@0 can be typed as the sequence of three characters |↑↑@|, code@1 can
be typed |↑↑A|, and so on up to code@31, which is |↑↑_|\thinspace; you use
the characters |@|, |A|, \dots,@|_| (whose ascii equivalents are 64
to@95) to get codes that differ by 64. Also, code 127 can be typed |↑↑?|;
the dangerous bend sign could therefore be obtained by saying
`|{\manual↑↑?}|'. However, you must change the category code of character
127 before using it, since this character ordinarily has category@15
(invalid); say, e.g., `|\catcode127=12|'.
↑(double caret) ↑(caret caret)
The |↑↑| notation is different from |\char|, because |↑↑| combinations can
be used as if they were single characters; for example, it would not
be permissible to say |\catcode`\char127|, but |↑↑| symbols can even be
used as letters within control sequences.
\danger One of the overfull box messages in Chapter 6 illustrates the fact
that \TeX\ sometimes uses the funny |↑↑| convention in its output:
the umlaut character in that example appears as |↑↑S|, and the cedilla appears
as@|↑↑X|, because `\thinspace\char'23\thinspace' and `\char'30' occur in
positions \oct{23} and@\oct{30} of the ↑{:tenrm} font.
\danger Most of the |↑↑| codes are unimportant except in special applications.
But |↑↑M| is particularly noteworthy because it is code 13, the ascii
↑{<return} that \TeX\ places at the right end of every line of
your input file. By changing the category of |↑↑M| you can obtain useful
special effects, as we shall see later.
\danger The control code |↑↑I| is also of potential interest, since it's
the ascii ↑{<tab}. Plain \TeX\ makes \<tab> act like a blank space.
\ddanger People who install \TeX\ systems for use with non-American alphabets
are advised to use character codes less than 32 for any additional letters,
and to assign category@11 (letter) to those codes. For example, suppose
you have a ↑{Norwegian keyboard} that contains the letter {\tt\ae}.
↑(Scandinavian letters) ↑(foreign languages)
You could design your \TeX\ interface so that this letter comes in as
code@28,\footnote*{There's nothing magic about this number 28, except that
by coincidence the Computer Modern fonts of plain \TeX\ happen to have
an `\ae' in position@28 already. Some change to the font layout is inevitable,
however, since all six of the special letters \ae, \o, \aa, \AE,
\O, and \AA\ should be assigned to positions less than 32. Characters
already in those positions can easily be moved to positions greater than
127, since they are never accessed by plain \TeX\ except via control
sequences.} say, and your standard format package should define
|\catcode`|{\tt\ae}|=11|. Then you could have control sequences like
|\s|{\tt\ae}|rtrykk|; and your \TeX\ input files would be readable by
American installations of \TeX\ that don't have your keyboard, by
substituting |↑↑\| for character@28. \ (For example, the stated control
sequence would appear as |\s↑↑\rtrykk| in the file; your American
friends should also be provided with the format that you used, with its
|\catcode`↑↑\=11|.) \ Of course you should also arrange your fonts
so that \TeX's character 28 will print as {\oldninerm\ae}; and you should
change \TeX's hyphenation algorithm so that it will do correct
Norwegian hyphenation. The main point is that such changes are not
extremely difficult; nothing in the design of \TeX\ limits it to the
American alphabet, as long as you have at most 128 different characters.
↑(keyboards, non-ascii)
\danger But wait, you say. Why are characters numbered from 0 to@127,
when fonts can contain up to 256 different symbols? The answer is that
\TeX\ can access positions 128 to 255 of a font in several reasonably
convenient ways, even though its character tokens are coded from 0 to@127.
You can use |\char|, generally via a control sequence, as already
mentioned; and the higher positions of a font can conveniently be occupied
by math symbols, as we shall see later. Another important way to generate
codes above 127 is by sequences of keystrokes (i.e., ↑{ligatures}), when
the font has been set up properly. It is often faster to touch-type a
sequence of letters than to hunt for a single key on a large keyboard;
thus the restriction to 128 typable characters is not actually unreasonable.
\ddanger For example, let's consider Norwegian again, but suppose that you
want to use a keyboard without an {\tt\ae} character. You can arrange the
↑{font metric file} so that \TeX\ will interpret `|ae|' as a ligature that
produces `{\oldninerm\ae}'; and you could put the character
`{\oldninerm\ae}' in position 128 of the font. Similarly, you could define
ligatures `|aa|' and `|o/|' to produce `\aa' and `\o' in positions 129 and
130; and there would also be `|AE|', `|AA|', and `|O/|' for
`{\oldninerm\AE}', `\AA', and `\O' in positions 131 to 133. By setting
|\catcode`/=11| you would be able to use the ligature |o/| in control
sequences like `|\ho/yre|'. \TeX's hyphenation method is not confused by
ligatures; so you could use this scheme to operate essentially as above,
but with two keystrokes in place of one. \ (Your typists would have to
watch out for the occasional times when the adjacent characters |aa|,
|oe|, and |o/| should not be treated as ligatures; also, `|\/|' would be
a ↑{control word}, not a ↑{control symbol}.)
\ddanger The rest of this chapter is devoted to \TeX's reading rules,
which define the conversion from text to tokens. For example, the fact
that \TeX\ ignores spaces after control words is a consequence of
the rules below, which imply among other things that spaces after control
words never become space tokens. The rules are intended to work the
way you would expect them to, so you may not wish to bother reading them;
but when you are communicating with a computer, it is nice to understand
what the machine thinks it is doing, and here's your chance.
\ddanger Whenever \TeX\ is reading a line of text from a file, or a line of
text that you entered directly on your terminal, the reading apparatus is
in one of three so-called ↑{states}:
$$\displayvbox{\halign{State $#$\qquad\hfil\hfil\cr
\noalign{\vskip1pt}
N&Beginning a new line;\cr
M&Middle of a line;\cr
S&Skipping blanks.\cr
\noalign{\vskip-3pt}}}$$
At the beginning of the line it's in state $N$, but most of the time it's
in state $M$, and after a control sequence or a space it's in state $S$.
Incidentally, ``states'' are different from the ``↑{modes}'' that we will
be studying later; the current {\sl state\/} refers to \TeX's eyes and
mouth as they take in characters of new text, but the current {\sl mode\/}
refers to the condition of \TeX's gastro-intestinal tract. Most of the
things that \TeX\ does when it converts characters to ↑{tokens} are independent
of the current state, but there are differences when spaces or end-of-line
characters are detected (categories 10 and 5).
\ddanger \TeX\ deletes any ↑{<space} characters (number 32) that occur at the
right end of an input line. Then it inserts a ↑{<return} character (number@13)
at the right end of the line, except that it places nothing additional at the
end of a line that you inserted with `|I|'
during ↑{error recovery}. Note that \<return> is considered to be an actual
character that is part of the line; you can obtain special effects by
changing its catcode.
\ddanger If \TeX\ sees an escape character (category 0) in any state, it
scans the entire ↑{control sequence} name as follows. (a)@If there are no
more characters in the line, the name is empty (like |\csname\endcsname|).
↑(null control sequence) ↑(csname endcsname)
Otherwise (b)@if the next character is not of category@11 (letter), the
name consists of that single symbol. Otherwise (c)@the name consists of all
letters beginning with the current one and ending just before the first
nonletter, or at the end of the line. This name becomes a control sequence
token. \TeX\ goes into state@$S$ in case@(c), or in case@(b) with respect
to a character of category@10 (space); otherwise \TeX\ goes into state@$M$.
\ddanger If \TeX\ sees a superscript character (category 7) in any state,
and if that character is followed by another identical character, and if
those two equal characters are followed by a character whose internal
code is between 63 and 95 inclusive, these three characters are replaced
by a single character, whose code is obtained by adding or subtracting
64 from the code of the third character. \ (Thus, |↑↑A| is
replaced by a character whose code is@1, etc., as explained earlier.) \
This replacement is carried out also if such a trio of
characters is encountered during steps (b) or@(c) of the control-sequence-name
scanning procedure described above. After the replacement is made, \TeX\
begins again as if the new character had been present all the time.
If a superscript character is not the first of such a trio, it is
handled by the following rule.
\ddanger If \TeX\ sees a character of categories 1, 2, 3, 4, 6, 8, 11, or@12,
or a character of category@7 that is not the first of a trio as just
described, it converts the character to a token by attaching the category
code, and goes into state@$M$. This is the normal case; almost every
nonblank character is handled by this rule.
\ddanger If \TeX\ sees an end-of-line character (category 5), it throws
away any other information that might remain on the current line. Then if
\TeX\ is in state@$N$ (new line), the end-of-line character is converted
to the control sequence token `\cstok{par}' ↑(*par) (end of paragraph); if
\TeX\ is in state@$M$ (mid-line), the end-of-line character is converted
to a token for character@32 (`\]') of category@10 (↑{space}); and if \TeX\
is in state@$S$ (skipping blanks), the end-of-line character is simply dropped.
\ddanger If \TeX\ sees a character to be ignored (category@9), it simply
bypasses that character as if it weren't there, and remains in the same state.
\ddanger If \TeX\ sees a character of category@10 (space), the action
depends on the current state. If \TeX\ is in state $N$ or $S$, the
character is simply passed by, and \TeX\ remains in the same state.
Otherwise \TeX\ is in state $M$; the character is converted to a token
of category@10 whose character code is@32, and \TeX\ enters state@$S$.
The character code in a space token is always@32.
\ddanger If \TeX\ sees an active character (category 13), it converts the
character to a control sequence token and goes to state $M$. Control
sequences for active characters are independent of the control sequences
formed by an escape prefixed to a single character; e.g., |@| and
|\@| are distinct control sequences.
\ddanger If \TeX\ sees a comment character (category@14), it throws away that
character and any other information that might remain on the current line.
\ddanger Finally, if \TeX\ sees an invalid character (category@15),
it bypasses that character, prints an error message, and remains in the
same state.
\ddanger If \TeX\ has nothing more to read on the current line, it goes to
the next line (if any) and enters state $N$. An empty line is appended to
the end of every text file, unless an |\input| file already ends with an empty
line, or unless the file has been prematurely terminated by
↑{*endinput}. An ↑{empty line} has no characters, but a
\<return> is placed after it in the usual manner. Therefore
most files effectively end with the token `\cstok{par}'.
\ddangerexercise Test your understanding of \TeX's reading rules by answering
the following quickie questions: (a)@What is the difference between
categories 5 and@14? (b)@What is the difference between categories 3
and@4? (c)@What is the difference between categories 11 and@12? (d)@Are
spaces ignored after active characters? (e)@When a line ends with a comment
character like |%|, are spaces ignored at the beginning of the next line?
(f)@Can an ignored character appear in the midst of a control sequence name?
\answer (a)@Both characters terminate the current line; but a character of
category@5 might be converted into a space token or a \cstok{par} token, while
a character of category@14 never produces a token. (b)@They produce
character tokens stamped with different category numbers. For example,
|$|$_3$ is not the same token as |$|$_4$, so \TeX's digestive processes
will treat them differently. (c)@Same as@(b), plus the fact that control
sequence names treat letters differently. It turns out that \TeX's
digestive processes treat categories 11 and 12 identically, except that
the category code is significant in the ↑{*ifcat} and ↑{*ifx} tests and
when looking for the end of a macro argument. (d)@No. (e)@Yes; they're
ignored at the beginning of every line, since every line starts in
state@$N$. (f)@No.
\ddangerexercise Look again at the error messages that appear near the
end of Chapter@6. When \TeX\ reported that |\vship| was an undefined
control sequence, it printed two lines of context, showing that
it was in the midst of reading line@2 of the |story| file. At the
time of that error message, what state was \TeX\ in? What character
was it about to read next?
\answer \TeX\ had just read the control sequence |\vship|, so it
was in state@$S$, and it was just ready to read the space before `|1in|'.
Afterwards it ignored that space, since it was in state@$S$; but if
you had typed |I\obeyspaces| in response to that error message,
you would have seen the space. Incidentally, when \TeX\ prints
the ↑{context of an error message}, the bottom pair of lines comes from
a text file, but the other pairs of lines are portions of token lists
that \TeX\ is reading (unless they begin with `|<*>|', when they
represent text inserted during ↑{error recovery}).
\ddangerexercise Given the category codes of plain \TeX\ format,
what tokens are produced from the input line
`| $x↑2$@ \TeX ↑↑C|'\thinspace?
\answer |$|$_{3}$ |x|$_{10}$ |↑|$_7$ |2|$_{12}$ |$|$_{3}$ |@|$_{13}$ \]$_{10}$
\cstok{TeX} |↑↑C|$_{12}$ \]$_{10}$. The final space comes from the
\<return> placed at the end of the line. The character code
for |↑↑C| is@3.
\ddangerexercise Consider an input file that contains exactly
three lines; the first line says `|Hi!|', while the other two lines
are completely blank. What tokens are produced when \TeX\ reads
this file, using the category codes of plain \TeX\ format?
\answer |H|$_{11}$ |i|$_{11}$ |!|$_{12}$ \]$_{10}$ \cstok{par}
\cstok{par} \cstok{par}. The `\]' comes from the \<return> at the
end of the first line; the second and third lines each contribute
a \cstok{par}; and the final \cstok{par} comes from the additional blank
line inserted by \TeX\ at the end of each input file.
\ddangerexercise How can you insert a space with `|I|' while ↑{recovering}
from an error? \ (Remember that \TeX\ removes spaces that occur at the very
end of every line.) ↑(inserting online)
\answer Type `|I|\]|%|', assuming that |%| is a comment character (category@14).
\TeX\ is in state@$M$ when it reads the character following@`|I|'.
\ddangerexercise Assume that the category codes of plain \TeX\ are in
force, except that the characters |↑↑A|, |↑↑B|, |↑↑C|, |↑↑M| belong
respectively to categories 0, 7, 10, and 11. What tokens are produced from
the (rather ridiculous) input line `|↑↑B↑↑BM↑↑A↑↑B↑↑C↑↑M↑↑@\M|\]'?
(Remember that this line is followed by \<return>, which is
|↑↑M|; and recall that |↑↑@| denotes the ↑{<null} character, which has
category@9 when |INITEX| begins.)
\answer The two |↑↑B|'s are not recognized as consecutive superscript
characters (sigh), so the result is seven tokens: |↑↑B|$_7$ |↑↑B|$_7$
|M|$_{11}$ \cstok{\char'17\char'17B} \]$_{10}$ |↑↑M|$_{11}$ \cstok{M\char'17
\char'17M}. The last of these is a control word whose name has two letters.
The \<space> after |\M| is deleted before \TeX\ inserts the \<return> token.
\ddanger Since it is possible to change the category codes, \TeX\ might
actually use several different categories for the same character on a single
line. For example, Appendix@E contains several ways to coerce \TeX\ to
process text ``↑{verbatim},'' so that the author could prepare this manual
without great difficulty. \ (Try to imagine typesetting a \TeX\ manual;
backslashes and other special characters need to switch back and forth
between their normal categories and category@12!) \ Some care is needed to
get the timing right, but you can make \TeX\ behave in a variety of
different ways by judiciously changing the categories. On the other hand,
it is best not to play with the category codes very often, because you must
remember that characters never change their categories once they have become
tokens. For example, when the arguments to a macro are first scanned,
they are placed into a token list, so their categories are fixed once and
for all at that time. The author has intentionally kept the category
codes numeric instead of mnemonic, in order to discourage people from
making extensive use of\/ |\catcode| changes except in unusual
circumstances.
\ddangerexercise Appendix B defines ↑{:lq} and ↑{:rq} to be abbreviations
for |`| and |'| (single left and right quotes, respectively). Explain why
the definitions
\begintt
\chardef\lq=96 \chardef\rq=39
\endtt
would not be as good.
\answer Both alternatives work fine in text; in particular, they combine
as in |\lq\lq| to form ligatures. But the definition in Appendix@B works
also in connection with constants; e.g., |\char\lq\%| and
|\char\rq140| are valid. \ (Incidentally, the construction |\let\lq=`|
would not work with constants, since the quotes in a ↑{<number} must
come from character tokens of category@12; after |\let\lq=`| the control
sequence token |\lq| will not expand into a character token, nor {\sl is\/}
it a character token!) ↑(*let)
\endchapter
for life's not a paragraph
\quad
% he left a blank line here, really
And death i think is no parenthesis.
\author e.@e.@↑{cummings}, {\sl since feeling is first\/} (1926)
\bigskip
This coded character set is to facilitate
the general interchange of information
among information processing systems,
communication systems, and
associated equipment.
$\ldots$ An 8-bit set was considered
but the need for more than 128 codes
in general applications was not yet evident.
\author ASA SUBCOMMITTEE X3.2, {\sl American Standard\break %
Code for Information Interchange\/↑(ascii)} (1963)
% in {\sl Communications of the ACM\/}
\eject
\beginchapter Chapter 9. \TeX's Roman Fonts
When you're typing a manuscript for \TeX, you need to know what symbols
are available. The plain \TeX\ format of Appendix@B is based on the
Computer Modern fonts, which provide the characters needed to typeset a
wide variety of documents. It's time now to discuss what a person can do
with plain \TeX\ when typing straight text. We've already touched on some of
the slightly subtle things---for example, dashes and quotation marks
were considered in Chapter@2, and certain kinds of accents appeared in the
examples of Chapters 3 and@6. The purpose of this chapter is to give a
more systematic summary of the possibilities, by putting all the facts
together.
Let's begin with the rules for the normal roman font (|\rm| or |\tenrm|);
plain \TeX\ will use this font for everything unless you specify
otherwise. Most of the ordinary symbols that you need are readily available
and you can type them in the ordinary way: There's nothing special about
$$\openup{1pt}\halign{\indent#\hfil\cr
the letters |A| to |Z| and |a| to |z|\cr
the digits |0| to |9|\cr
common punctuation marks |: ; ! ? ( ) [ ] ` ' - * / . ,|\cr}$$
↑(letters) ↑(digits) ↑(punctuation)
except that \TeX\ recognizes certain combinations as ↑{ligatures}:
$$\openup{1pt}\halign{\indent#\hfil\ produces \hfil&
\qquad#\hfil\ produces \hfil&
\qquad#\hfil\ produces \hfil\cr
|ff|&ff;&|ffi|&ffi;&|``|&``\thinspace\cr
|fi|&fi;&|ffl|&ffl;&|''|&''\thinspace\cr
|fl|&fl;&|--|&--\thinspace;&|---|&---\thinspace.\cr}$$
You can also type |+|, |=|, |<|, and |>|, to get the corresponding
symbols +, =, <, and >; but it's much better to use these characters
only in math mode, i.e., enclosed between two |$| signs, since that tells
\TeX\ to insert the proper spacing for mathematics. Math mode is
explained later; for now, it's just a good idea to remember that formulas
and text should be segregated. A non-mathematical hyphen and a non-mathematical
slash should be specified by typing `|-|' and `|/|' outside of mathematics
mode, but subtraction and division should be specified by typing `|-|' and
`|/|' between |$|@signs.
↑(Colon)
↑(Semicolon)
↑(Exclamation point)↑(Shriek, see exclamation point)
↑(Question mark)
↑(Parentheses)
↑(Brackets)
↑(Apostrophe) ↑(Reverse apostrophe)
↑(Hamza, see apostrophe) ↑(Ain, see reverse apostrophe)
↑(Hyphen) ↑(Dash)
↑(Asterisk)
↑(Virgule, see slash)
↑(Solidus, see slash)
↑(Shilling sign, see slash)
↑(Slash)
↑(Period) ↑(Full stop, see period)
↑(Comma)
↑(Plus sign)
↑(Equals sign)
↑(Less than sign)
↑(Greater than sign)
The previous paragraph covers 81 of the 94 visible characters of standard
ascii; so your keyboard probably contains at least 13 more symbols, and
you should learn to watch out for the remaining ones, since they are special.
Five of these are pre\"empted by plain \TeX; if your manuscript requires
the symbols
\begintt
$ # % & @
\endtt
↑(dollar sign) ↑(sharp sign, see hash mark) ↑(number sign, see hash mark)
↑(hash mark) ↑(percent sign) ↑(ampersand) ↑(at sign)
you should remember to type them as
\begintt
\$ \# \% \& \@
\endtt
respectively. Plain \TeX\ also reserves the five symbols
\begintt
\ { } ↑ _
\endtt
↑(backslash) ↑(braces) ↑(curly braces, see braces) ↑(caret, see circumflex)
↑(circumflex) ↑(underline)
but you probably don't mind losing these, since they don't appear in
normal copy; braces and backslashes are available via control sequences
in math mode.
\goodbreak
There are three remaining special characters in the standard ascii set:
\begintt
" |vrt ~
\endtt
Again, you don't really want them. \ (Double-quote marks should be
replaced either by |``| or by |''|\thinspace; vertical lines are needed only
in math mode; tildes are needed only as accents.)
↑(double-quote mark) ↑(vertical line, see norm) ↑(norm symbol) ↑(tilde)
Scholarly publications in English often refer to other languages, so
plain \TeX\ makes it possible to typeset the most commonly used ↑{accents}:
$$\halign{\indent\hbox to 50pt{#\hfil}&\hbox to 35pt{#\hfil}\hfil\cr
\it\negthinspace Type&\it to get\cr
\noalign{\smallskip}
|\`o|&\`o&(grave accent)\cr
|\'o|&\'o&(acute accent)\cr
|\↑o|&\↑o&(circumflex or ``hat'')\cr
|\"o|&\"o&(umlaut or dieresis)\cr
|\~o|&\~o&(tilde or ``squiggle'')\cr
|\=o|&\=o&(macron or ``bar'')\cr
|\.o|&\.o&(dot accent)\cr
|\v o|&\v o&(h\'a\v cek or ``check'')\cr
|\u o|&\u o&(breve accent)\cr
|\H o|&\H o&(long Hungarian umlaut)\cr
|\t oo|&\t oo&(tie-after accent)\cr}$$
↑(:`) ↑(grave accent)
↑(:') ↑(acute accent)
↑(:caret) ↑(circumflex accent) ↑(hat accent)
↑(:") ↑(umlaut accent) ↑(dieresis)
↑(:tilde) ↑(tilde accent) ↑(squiggle accent)
↑(:=) ↑(macron accent) ↑(bar accent)
↑(:.) ↑(dot accent)
↑(:v) ↑(h\'a\v cek accent) ↑(check accent)
↑(:u) ↑(breve accent)
↑(:H) ↑(Hungarian umlaut)
↑(:t) ↑(tie-after accent)
↑(embellished letters, see accents)
Within the font, such accents are designed to appear at the right height
for the letter `o'; but you can use them over any letter, and \TeX\ will
raise an accent that is supposed to be taller. Notice that spaces are needed
in the last four cases, to separate the control sequences from the letters
that follow. You could, however, type `|\H{o}|' in order to avoid putting a
space in the midst of a word.
\medbreak
Plain \TeX\ also provides three accents that go underneath:
$$\halign{\indent\hbox to 50pt{#\hfil}&\hbox to 35pt{#\hfil}\hfil\cr
\it\negthinspace Type&\it to get\cr
\noalign{\smallskip}
|\c o|&\c o&(cedilla accent)\cr
|\d o|&\d o&(dot-under accent)\cr
|\b o|&\b o&(bar-under accent)\cr}$$
↑(:c) ↑(cedilla accent)
↑(:d) ↑(dot-under accent) ↑(emphatics, see dot-under)
↑(:b) ↑(bar-under accent)
And there are a few special letters:
$$\halign{\indent\hbox to 50pt{#\hfil}&\hbox to 35pt{\oldtenrm#\hfil}\hfil\cr
\it\negthinspace Type&\it to get\cr
\noalign{\smallskip}
|\oe,\OE|&\oe,\thinspace\OE&(French ligature OE)\cr
|\ae,\AE|&\ae,\thinspace\AE&(Latin and Scandinavian ligature AE)\cr
|\aa,\AA|&\aa,\thinspace\AA&(Scandinavian A-with-circle)\cr
|\o,\O|&\o,\thinspace\O&(Scandinavian O-with-slash)\cr
|\l,\L|&\l,\thinspace\L&(Polish suppressed-L)\cr
|\ss|&\ss&(German ``es-zet'' or sharp S)\cr}$$
↑(Scandinavian letters) ↑(sharp S) ↑(es-zet) ↑(German) ↑(Polish)
↑(Norwegian) ↑(Danish) ↑(Swedish) ↑(Icelandic) ↑(suppressed-L)
The |\rm| font contains also the ↑{dotless letters} `\i' and `\j',
which you can obtain by typing `↑{*i}' and `↑{*j}'. These are needed because
`i' and `j' should lose their dots when they gain an accent. For example,
the right way to obtain `m\=\i n\u us' is to type \hbox{`|m\=\i n\u us|'}
or `|m\={\i}n\u{u}s|'.
This completes our summary of the |\rm| font. Exactly the same conventions
apply to |\bf|, |\sl|, and |\it|, so you don't have to do things differently
when you're using a different typeface. For example, |\bf\"o| yields
{\bf\"o} and |\it\&| yields {\it\&}. Isn't that nice?
\danger However, |\tt| is slightly different. You will be glad to know that
|ff|, |fi|, and so on are not treated as ligatures when you're using
↑{typewriter type}; nor do you get ligatures from dashes and quote marks.
That's fine, because ordinary dashes and ordinary double-quotes are
appropriate when you're trying to imitate a typewriter. Most of the
accents are available too. But |\H|, |\l|, |\L|, and |\t| cannot be
used---the typewriter font contains other symbols in their place.
\ (See Appendix@F\null.) \ All of the letters, spaces, and other symbols in
|\tt| have the same width.
\exercise What's the non-naive way to type `na\"\i ve'\thinspace?
\answer |na\"\i ve| or |na{\"\i}ve| or |na\"{\i}ve|.
\exercise List some English words that contain accented letters.
\answer Belov\`ed proteg\'e r\↑ole co\"ordinator: souffl\'es, cr\↑epes,
p\↑at\'es, etc.
\exercise How would you type `{\oldtenrm
\AE sop's \OE uvres} en fran\c cais'\thinspace?
\answer |\AE sop's \OE uvres en fran\c cais|.
\exercise Explain what to type in order to get the sentence
\line{{\sl Commentarii Academ\ae\ Petropolitan\ae\/} is now
{\sl Akademi\t\i a Nauk SSSR, Doklady}.}
\answer |{\sl Commentarii Academ\ae\ Petropolitan\ae\/} is now|\hfil\break
|{\sl Akademi\t\i a Nauk SSSR, Doklady}.|
\exercise And how would you specify the names
Ernesto ↑{Ces\`aro},
P\'al ↑{Erd\H os},
\O ystein ↑{Ore},
Stanis\l aw \'Swierczkowski, ↑(Swiercz...)
Serge\u\i\ \t Iur'ev, ↑(Iur'ev)
Mu\d hammad ibn M\↑us\↑a ↑{al-Khw\↑arizm\↑\i}?
\answer |Ernesto Ces\`aro,
P\'al Erd\H os,
\O ystein Ore,
Stanis\l aw \'Swier%|\break|czkowski,
Serge\u\i\ \t Iur'ev,
Mu\d hammad ibn M\↑us\↑a al-Khw\↑arizm\↑\i.|
\dangerexercise Devise a way to typeset {\tt P\'al Erd{\bf\H{\tt o}}s}
in typewriter type.
\answer The proper umlaut is |\H|, which isn't available in |\tt|, so
it's necessary to borrow the accent from another font. For example,
\hbox{|{\tt P\'al Erd{\bf\H{\tt o}}s}|}.
The following symbols come out looking exactly the same whether you are using
|\rm|, |\sl|, |\bf|, |\it|, or |\tt|:
$$\halign{\indent#\hfil\ &\hfil#\hfil\hfil\cr
\it\negthinspace Type&\it to get\cr
\noalign{\smallskip}
|\dag|&\dag&(dagger or obelisk)\cr
|\ddag|&\ddag&(double dagger or diesis)\cr
|\section|&\section&(section number sign)\cr
|\P|&\P&(paragraph sign or pilcrow)\cr
|\copyright|&\copyright&(copyright sign)\cr
|\sterling|&\sterling&(British pound sign)\cr}$$
↑(dagger) ↑(double dagger) ↑(obelisk) ↑(obelus, see obelisk) ↑(diesis)
↑(section number sign) ↑(paragraph sign) ↑(pilcrow) ↑(copyright sign)
↑(British pound sign) ↑(pound sterling)
(They appear in just one style because plain \TeX\ gets them from the
math symbols font. Lots of other symbols are needed for mathematics;
we shall study them later.)
\ddanger Appendix B shows that plain \TeX\ handles most of the accents
by using \TeX's ↑{*accent} primitive. For example, |\'#1| is equivalent
to |{\accent14 #1}|, where |#1| is the argument being accented.
The general rule is that |\accent|\<number> puts
an accent over the next character; the \<number> tells where that accent
appears in the current font. The accent is assumed to be properly
positioned for a character whose height equals the ↑{x-height} of the
current font; taller or shorter characters cause the accent to be raised
or lowered, taking due account of the slantedness of the fonts of accenter
and accentee. The width of the final construction is the width of the
character being accented, regardless of the width of the accent.
Mode-independent commands like font changes may appear between the accent
number and the character to be accented, but grouping operations must not
intervene. If it turns out that no suitable character is present, the
accent will appear by itself as if you had said |\char|\<number> instead
of\/ |\accent|\<number>. For example, |\'{}| produces \'{}.
\ddangerexercise Why do you think plain \TeX\ defines |\'#1| to be
`|{\accent14 #1}|' instead of simply letting |\'| be an abbreviation
for `|\accent14 |'\thinspace? \ (Why the extra
braces, and why the argument |#1|?)
\answer The extra braces keep font changes local. An argument makes the
use of\/ |\'| more consistent with the use of other accents like |\.|, which
are manufactured from other characters without using the |\accent|
primitive.
\ddanger It's important to remember that these conventions we have discussed
for accents and special letters are not built into \TeX\ itself; they belong
only to the plain \TeX\ format, which uses the Computer Modern fonts. Quite
different conventions will be appropriate when other fonts are involved;
format designers should provide rules for how to obtain accents and
special characters in their particular systems. Plain \TeX\ works well
enough when accents are infrequent, but the conventions of this chapter
are by no means recommended for large-scale applications of \TeX\ to
other languages. For example, a well-designed \TeX\ font for ↑{French}
would probably treat accents as ligatures, so that one could |e'crire
de cette nai"ve manie`re en franc/aise| without backslashes. (See the
remarks about Norwegian in Chapter@8.)
↑(foreign languages)
\endchapter
Let's doo't after the high Roman fashion.
\author WILLIAM ↑{SHAKESPEARE}, {\sl Anthony and Cleopater\/} (1606)
% Act IV, Scene 13, line 87
\bigskip
English is a straightforward, frank, honest, open-hearted, no-nonsense language,
which has little truck with such devilish devious devices as accents;
indeed U.S. editors and printers are often thrown into a dither
when a foreign word insinuates itself into the language.
However there is one word on which Americans seem to have closed ranks,
printing it confidently, courageously, and almost invariably
complete with accent---the cheese presented to us as M\"unster.
\smallskip
Unfortunately, ↑{Munster} doesn't take an accent.
\author WAVERLEY ↑{ROOT}, in the {\sl International Herald Tribune\/} (1982)
% Tuesday 18 May 82 page 8
\eject
\beginchapter Chapter 10. Dimensions
Sometimes you want to tell \TeX\ how big to make a space, or how wide to
make a line. For example, the short story of Chapter@6 used the instruction
`|\vskip .5cm|' to skip vertically by half a centimeter, and we also
said `|\hsize=4in|' to specify a horizontal size of 4@inches. It's time now
to consider the various ways such ↑{dimensions} can be communicated to \TeX.
``↑{Points}'' and ``↑{picas}'' are the traditional units of measure for
printers and compositors in English-speaking countries, so \TeX\
understands points and picas. \TeX\ also understands inches and metric
units, as well as the continental European versions of points and picas.
Each unit of measure is given a two-letter abbreviation, as follows:
↑(units of measure, table)
$$\halign{\indent\tt#&\quad#\hfil\cr
pt&point (baselines in this manual are $12\pt$ apart)\cr
pc&pica ($\rm1\,pc=12\,pt$)\cr
in&inch ($\rm1\,in=72.27\,pt$)\cr
bp&big point ($\rm72\,bp=1\,in$)\cr
cm¢imeter ($\rm2.54\,cm=1\,in$)\cr
mm&millimeter ($\rm10\,mm=1\,cm$)\cr
dd&did\↑ot point ($\rm1157\,dd=1238\,pt$)\cr
cc&cicero ($\rm1\,cc=12\,dd$)\cr
sp&scaled point ($\rm65536\,sp=1\,pt$)\cr}$$
↑(.pt)↑(point)
↑(.pc)↑(pica)
↑(.in)↑(inch)
↑(.bp)↑(big point)
↑(.cm)↑(centimeter)
↑(.mm)↑(millimeter)
↑(.dd)↑(did\↑ot point)↑(Did\↑ot, Fran\c cois Ambroise)
↑(.cc)↑(cicero)
↑(.sp)↑(scaled point)
The output of \TeX\ is firmly grounded in the metric system, using the
conversion factors shown here as exact ratios.
\exercise How many points are there in 254 centimeters?
\answer Exactly $7227\pt$.
When you want to express some physical dimension to \TeX, type it as
$$\displaybox{\<optional sign>\<number>\<unit of measure>}$$
or
$$\displaybox{\<optional sign>\<digit string>|.|\<digit string>\<unit
of measure>}$$
where an ↑{<optional sign} is either a `|+|' or a `|-|' or nothing at all,
and where a ↑{<digit string} consists of zero or more consecutive
decimal digits. The `|.|' can also be a `|,|'.
For example, here are some typical dimensions:
$$\halign{\indent#\hfil&\hskip 6em#\hfil\cr
|3 in|&|29 pc|\cr
|-.013837in|&|+ 42,1 dd|\cr
|0.mm|&|123456789sp|\cr}$$
A plus sign is redundant, but some people occasionally like extra
redundancy once in a@while. Blank spaces are optional before the signs and the
numbers and the units of measure, and you can also put an optional space
after the dimension; but you should not put spaces within the digits
of a number or between the letters of the unit of measure.
\exercise Arrange those six ``typical dimensions'' into order,
from smallest to largest.
\answer $\rm-.013837\,in$, $\rm0.\,mm$, $\rm+42.1\,dd$, $\rm3\,in$,
$\rm29\,pc$, $\rm123456789\,sp$.
\ (The lines of text in this manual are 29@picas wide.)
\dangerexercise Two of the following three dimensions are legitimate
according to \TeX's rules. Which two are they? What do they mean?
Why is the other one incorrect?
\begintt
'.77pt
"Ccc
-,sp
\endtt
\answer The first is not allowed, since octal notation cannot be used with
a decimal point. The second is, however, legal, since a \<number> can be
hexadecimal according to the rule mentioned in Chapter@8; it means
$\rm12\,cc$, which is $\rm144\,dd\approx154.08124\,pt$. The third is also
accepted, since a \<digit string> can be empty; it is a complicated
way to say $\rm0\,sp$.
The following ``rulers'' have been typeset by \TeX\ so that you can get
some idea of how different units compare to each other. If no distortion
has been introduced during the camera work and printing processes that
have taken place after \TeX\ did its work, these rulers are highly accurate.
$$ \abovedisplayskip 15pt plus 4pt minus 4pt
\belowdisplayskip 15pt plus 4pt minus 4pt
\vbox{
\def\1{\vrule height 0pt depth 2pt}
\def\2{\vrule height 0pt depth 4pt}
\def\3{\vrule height 0pt depth 6pt}
\def\4{\vrule height 0pt depth 8pt}
\def\ruler#1#2#3{\leftline{$\vcenter{\hrule\hbox{\4#1}}\quad\rm#2{#3}$}}
\def\\#1{\hbox to .125in{\hfil#1}}
\def\8{\\\1\\\2\\\1\\\3\\\1\\\2\\\1\\\4}
\ruler{\8\8\8\8}4{in}
\vskip 18pt
\def\\#1{\hbox to 10pt{\hfil#1}}
\def\8{\\\1\\\1\\\1\\\1\\\2\\\1\\\1\\\1\\\1\\\4}
\ruler{\8\8\8}{300}{pt}
\vskip 18pt
\def\\#1{\hbox to 10dd{\hfil#1}}
\def\8{\\\1\\\1\\\1\\\1\\\2\\\1\\\1\\\1\\\1\\\4}
\ruler{\8\8\8}{300}{dd}
\vskip 18pt
\def\\#1{\hbox to 5mm{\hfil#1}}
\def\8{\\\2\\\4}
\ruler{\8\8\8\8\8\8\8\8\8\8}{10}{cm}
\vskip 6pt}$$
\dangerexercise (To be worked after you know about boxes and glue and have
read Chapter@21.) \ Explain how to typeset such a $\rm10\,cm$ ↑{ruler},
using \TeX.
\answer {\obeylines|\def\tick#1{\vrule height 0pt depth #1pt}|
|\def\\{\hbox to 1cm{\hfil\tick4\hfil\tick8}}|
|\vbox{\hrule\hbox{\tick8\\\\\\\\\\\\\\\\\\\\}}|
\noindent(You might also try putting ticks at every millimeter, in order %
to see how good your system is; %
some output devices can't handle 101@rules all at once.)}
\danger \TeX\ represents all dimensions internally as an integer multiple
of the tiny units called sp. Since the wavelength of visible light is
approximately $\rm100\,sp$, % in fact: violet=75sp, red=135sp!
rounding errors of a few sp make no difference to the eye.
However, \TeX\ does all of its arithmetic very carefully so that
identical results will be obtained on different computers. Different
implementations of \TeX\ will produce the same line breaks and the same
page breaks when presented with the same document, because the integer
arithmetic will be the same.
↑(machine-independence) ↑(rounding)
\danger The units have been defined here so that precise conversion to@sp
is@efficient on a wide variety of machines. In order to achieve this,
\TeX's ``pt'' has been made slightly larger than the official printer's
point, which was defined to equal exactly $\rm.013837\,in$ by the American
Typefounders Association in@1886 [cf.@National Bureau of Standards
Circular@570 (1956)]. In fact, one classical point is exactly
$.99999999\pt$, so the ``error'' is essentially one part in $10↑8$.
This is more than two orders of magnitude less than the amount by which
the inch itself changed during 1959, when it shrank to $\rm2.54\,cm$ from
its former value of $\rm(1/0.3937)\,cm$; so there is no point in worrying
about the difference. The new definition $\rm72.27\,pt=1\,in$ is not only
better for calculation, it@is also easier to remember.
\danger \TeX\ will not deal with dimensions whose absolute value is
$\rm2↑{30}\,sp$ or more. In other words, the ↑{maximum legal dimension} is
slightly less than $16384\pt$. This is a distance of about 18.892 feet
(5.7583 meters), so it won't cramp your style.
In a language manual like this it is convenient to use ``↑{angle brackets}''
in abbreviations for various constructions like \<number> and \<optional
sign> and \<digit string>. Henceforth we shall use the term ↑{<dimen} to
stand for a legitimate \TeX\ dimension. For example,
$$\displaybox{|\hsize=|\<dimen>}$$
will be the general way to define the column width that \TeX\ is supposed
to use. The idea is that \<dimen> can be replaced by any quantity like
`|4in|' that satisfies \TeX's grammatical rules for dimensions;
abbreviations in angle brackets make it easy to state such laws of grammar.
When a dimension is zero, you have to specify a unit of measure even
though the unit is irrelevant. Don't just say `|0|'\thinspace; say `|0pt|' or
`|0in|' or something.
\smallbreak
The 10-point size of type that you are now reading is normal in textbooks,
but you probably will often find yourself wanting a larger font. Plain \TeX\
makes it easy to do this by providing {\magnifiedfiverm ↑{magnif{}ied
output}}. If you say
\begintt
\magnify{1200}
\endtt
at the beginning of your manuscript, everything will be enlarged by 20\%;
i.e., it will come out at 1.2 times the normal size. Similarly,
`|\magnify{2000}|' doubles everything; this actually quadruples the area of
each letter, since heights and widths are both doubled. To magnify a
document by the factor $f$, you say ↑{:magnify}|{|$n$|}|, where $n$@is
1000@times@$f$. This instruction must be given before the first page of output
has been completed. You cannot apply two different magnifications to the same
document.
Magnification has obvious advantages: You'll have less ↑{eyestrain} when
you're ↑{proofreading}; you can easily make ↑{transparencies} ↑(slides)
for lectures; and you can photo-reduce magnified output, in order to minimize
the deficiencies of a ↑{low-resolution printer}. Conversely, you might
even want to say `|\magnify{500}|' in order to create a ↑{pocket-size}
version of some book. ↑(squint print) But there's a slight catch:
You can't use magnification unless your printing device happens to have the
fonts that you need at the magnification you desire. In other words, you need
to find out what sizes are available before you can |\magnify|. Most
installations of \TeX\ make it possible to print all the fonts of plain
\TeX\ at three or more different magnifications, but the use of large fonts
can be expensive because a lot of system memory space is often required
to store the shapes.
\exercise Try printing the short story of Chapter 6 at 1.2, 1.5, and 2.0
times the normal size. What should you type to get \TeX\ to do this?
\answer For example, say `|\magnify{1200} \input story \end|'. Three
separate runs are needed, since there can be at most one magnification
per job. The output may look funny if the fonts don't exist at the
stated magnifications.
\danger When you say |\magnify{2000}|, an operation like `|\vskip.5cm|' will
actually skip $\rm1.0\,cm$ of space in the final document. If you
want to specify a dimension in terms of the final size, \TeX\ allows
you to say `↑{.true}' just before |pt|, |pc|, |in|, |bp|, |cm|, |mm|,
|dd|, |cc|, and |sp|. This unmagnifies the units, so that the subsequent
magnification will cancel out. For example, `|\vskip.5truecm|' is
equivalent to `|\vskip.25cm|' if you have previously said
`|\magnify{2000}|'. Plain \TeX\ uses this feature in the |\magnify|
command itself: Appendix@B includes the instruction
\begintt
\hsize = 6.5 true in
\endtt
just after a new magnification has taken effect. This adjusts the line width
so that the material on each page will be $6{1\over2}$ inches wide when it
is finally printed, regardless of the magnification factor.
There will be an inch of margin at both left and right,
assuming that the paper is $8{1\over2}$ inches wide.
\danger If you use no `|true|' dimensions, \TeX's internal computations are not
affected by the presence or absence of magnification; line breaks and page
breaks will be the same, and the ↑{.dvi} file will change in only two places.
\TeX\ simply tells the printing routine that you want a certain magnification,
and the printing routine will do the actual enlargement when it reads the
|dvi| file.
\dangerexercise Chapter 4 mentions that fonts of different magnifications
can be used in the same job, by loading them `↑{.at}' different sizes.
Explain what fonts will be used when you say `|\magnify{1500}
\font\first=cmr10 at 12pt \font\second=cmr10 at 12truept|'.
↑(magnified fonts)
\answer Font |\first| will be cmr10 at $18\pt$ after magnification;
font |\second| will be cmr10 at $12\pt$. \ (\TeX\ changes
`|12truept|' into `|8pt|', and the final output magnifies it back to
$12\pt$.)
\ddanger Magnification is actually governed by \TeX's ↑{*mag} primitive,
which is an integer parameter that should be positive and at@most@32768.
The value of\/ |\mag| is examined in three cases: (1)@just before the
first page is shipped to the |dvi| file; (2)@when computing a |true|
dimension; (3)@when the |dvi| file is being closed. Alternatively,
some implementations of \TeX\ produce non-|dvi| output; they examine
|\mag| in case@(2) and when shipping out each page. The value of\/ |\mag|
must not change after it has first been examined.
\danger Sometimes \TeX's built-in units (|pt|, |cm|, etc\null.) aren't
really right for your application. You can always make up a new unit
of measure by saying
$$\displaybox{|\varunit=|\<dimen>}$$
after which you can give dimensions in `↑{.vu}'. For example, after
`↑{*varunit}|=.11111pt|', the instruction `|\vskip 2.5vu|' will skip
vertically by $0.277775\pt$. \TeX\ also recognizes two other
context-dependent units of measure:
$$\halign{\indent#\hfil\cr
|em| is the width of a ``quad'' in the current font;\cr
|ex| is the ``x-height'' of the current font.\cr}$$
↑(.em) ↑(quad) ↑(ex) ↑(x-height)
Each font defines its own em and ex values. In olden days, an ``em'' was
the width of an `M', but this is no longer true; ems are simply arbitrary
units that come with a font, and so are exes. The Computer Modern fonts
have the property that an em-dash is one em wide, each of the ↑{digits} 0
to@9 is half an em wide, and lower-case `x' is one ex high; but these are
not hard-and-fast rules for all fonts.
The |\rm| font (↑{.cmr10}) of plain \TeX\ has $\rm1\,em=10\,pt$
and $\rm1\,ex\approx4.3\,pt$; the |\bf| font (↑{.cmbx10}) has
$\rm1\,em=11.5\,pt$ and $\rm1\,ex\approx4.44\,pt$; and the |\tt| font
(↑{.cmtt}) has $\rm1\,em=10.5\,pt$ and $\rm1\,ex\approx4.3\,pt$. All of
these are ``10-point'' fonts, yet they have different em and ex values.
It@is generally best to use |em| for horizontal measurements and |ex| for
vertical measurements that depend on the current font.
\danger \TeX\ has several families of internal registers that we will be
discussing later; for now it will suffice to give a hint about what is
to@come. A \<dimen> can depend on \TeX's registers if one of the following
codes is used instead of a unit of measure:
$$\halign{\indent{\tt#}\<number> refers to the current \hfil\cr
dm&value of a |\dimen| register;\cr
ht&height of a |\box| register;\cr
wd&width of a |\box| register;\cr
dp&depth of a |\box| register.\cr}$$
↑(.dm) ↑(*dimen) ↑(.ht) ↑(.wd) ↑(.dp) ↑(*box) ↑(registers) ↑(*count)
You can use |ht|, |wd|, |dp| to make decisions based on the sizes of
boxes; for example, `|.5wd3|' is half the width of\/ |\box3|. If
$\hbox{|\count20|}=5$ and $\hbox{|\dimen5|}=30\pt$, then `|\vskip
1.5dm\count20|' will skip vertically by $45\pt$.
\danger Notice that the unit names in dimensions
are not preceded by backslashes. The same is true of other so-called
↑{keywords} of the \TeX\ language. Keywords can be given in upper-case letters
or in a mixture of upper and lower case; e.g., `|Pt|' is equivalent to `|pt|'.
The category codes of these letters are irrelevant; you may, for example,
be using a |p| of category@12 (otherchar) that was generated by expanding
`|\the\hsize|' as explained in Chapter@20.
\TeX\ gives a special interpretation to keywords only when they
appear in certain very restricted contexts. For example, `|pt|' is a
keyword only when it appears after a number in a \<dimen>;
`|at|' is a keyword only when it appears after the external name of a
font in a |\font| declaration.
Here is a complete list of \TeX's keywords, in case you are wondering about
the full set: |at|, |bp|, |by|, |cc|, |cm|, |dd|, |depth|, |dm|,
|dp|, |em|, |ex|, |expand|, |fil|, |fill|, |filll|, |height|, |ht|,
|in|, |minus|, |mm|, |mu|, |pc|, |plus|, |pt|, |sp|, |to|, |true|, |vu|,
|wd|, |width|. ↑(reserved words)
See Appendix@I for references to the contexts in which each of these is
recognized as a keyword.
\danger A \<dimen> can also refer to \TeX's dimension or glue parameters.
For example, `|\the\hsize|' stands for the current value of\/ |\hsize|;
`|\the\baselineskip|', when used in the context of a \<dimen>,
stands for the current |\baselineskip|,
ignoring its stretchability or shrinkability. For full details about
`↑{*the}', see Chapter@20.
\endchapter
The methods that have hitherto been taken
to discover the measure of the Roman foot,
will, upon examination, be found so unsatisfactory, that
it is no wonder the learned are not yet agreed on that point.
$\ldots$
9 London inches are equal to 8,447 Paris inches.
\author MATTHEW ↑{RAPER}, in {\sl Philosophical Transactions\/} (1760)
% ``An Enquiry into the Measure of the {\sl Roman\/} Foot,''
% {\sl Philos.\ Trans.\ \bf51} (1760), 774--823.
\bigskip
\checkequals\sesame\count0 %
Without the letter U,
units would be nits.
\author ↑{SESAME STREET}{↑(Children's Television Workshop)} (1970)
\eject
\beginchapter Chapter 11. Boxes
\TeX\ makes complicated pages by starting with simple individual characters
and putting them together in larger units, and putting these together in still
larger units, and so on. Conceptually, it's a big paste-up job. The \TeX nical
terms used to describe such page construction are {\sl ↑{boxes}\/} and
{\sl ↑{glue}}.
Boxes in \TeX\ are two-dimensional things with a rectangular shape, having
three associated measurements called {\sl↑{height}}, {\sl↑{width}}, and
{\sl↑{depth}}. Here is a picture of a typical box, showing its so-called
↑{reference point} and ↑{baseline}:
$$\eightpoint
\setbox0=\hbox{$\uparrow$}
\setbox1=\hbox to 1wd0{$\hss\mid\hss$} % with luck, they'll line up
\setbox2=\vbox{\copy0
\nointerlineskip \kern-.5pt \copy1
\nointerlineskip \kern-.5pt \copy1
\moveleft 1em\hbox{height}
\copy1 \nointerlineskip \kern-.5pt
\copy1 \nointerlineskip \kern-.5pt
\hbox{$\downarrow$}
\kern.2pt}
\setbox3=\vbox{\kern.2pt\copy0
\moveleft 1em\hbox{depth}
\hbox{$\downarrow$}
\kern0pt}
\setbox4=\vtop{\kern-3pt % this cancels the null text above the samplebox
\hbox{\samplebox{1ht2}{1ht3}{6em}{}%
\kern-6em
\raise3pt\hbox to 6em{\hss Baseline\hss}}
\kern3pt
\arrows{6em}{width}}
\dbox{\setbox0=\hbox{$\vcenter{}$}% 1ht0 is the axis height
\lower1ht0\hbox{Reference point$-$\kern-.2em$\rightarrow$\kern2pt}%
\raise1ht2\box4
\kern1.5em
\raise1ht2\vtop{\kern0pt\box2\nointerlineskip\box3}\hss}$$
From \TeX's viewpoint, a single character from a font is a box; it's one
of the simplest kinds of boxes. The font designer has decided what the
height, width, and depth of the character are, and what the symbol will
look like when it is in the box; \TeX\ uses these dimensions to paste
boxes together, and ultimately to determine the locations of the reference
points for all characters on a page. In plain \TeX's |\rm| font (cmr10), for
example, the letter `h' has a height of 6.9444 points, a width of 5.5556
points, and a depth of zero; the letter `g' has a height of 4.3055
points, a width of 5 points, and a depth of 1.9444 points. Only certain
special characters like parentheses have height plus depth actually equal
to 10 points, although ↑{cmr10} is said to be a ``10-point'' font. You needn't
bother to learn these measurements yourself, but it's good to be aware of
the fact that \TeX\ deals with such information; then you can better
understand what the computer does to your manuscript.
The character shape need not fit inside the boundaries of its box. For example,
some characters that are used to build up larger math symbols like matrix
brackets intentionally protrude a little bit, so that they overlap
properly with the rest of the symbol. Slanted letters frequently extend a
little to the right of the box, as if the box were skewed right at the top
and left at the bottom, keeping its baseline fixed. For example, compare
the letter `g' in the cmr10 and ↑{cms10} fonts (|\rm| and |\sl|):
\figure{40pt}{(A figure will be inserted here; too bad you can't see it now.
It shows two g's, as claimed.)}
In both cases \TeX\ thinks that the box is 5 points wide, so both letters get
exactly the same treatment. \TeX\ doesn't have any idea where the ink will
go---only the output device knows this. But the slanted letters will be
spaced properly in spite of \TeX's lack of knowledge, because the baselines
will match up.
Actually the font designer also tells \TeX\ one other thing, the so-called
{\sl↑{italic correction}\/}: A number is specified for each character,
telling roughly how far that character extends to the right of its box
boundary, plus a little to spare. For example,
the italic correction for `g' in cmr10 is $0.1389\pt$, while in cms10 it is
$0.8565\pt$. Chapter@4 points out that this correction is added to the
normal width if you type `↑{*/}' just after the character. You should remember
to use |\/| when shifting from a slanted font to an unslanted one, especially
in cases like
\begintt
the so-called {\sl italic correction\/}:
\endtt
since no space intervenes here to compensate for the loss of slant.
\smallbreak
\TeX\ also deals with another simple kind of box, which might be called
a@``↑{black box},'' namely, a rectangle like
`\thinspace \vrule width 4pt height 6pt depth 1.5pt \thinspace'
that is to be entirely filled with ink at printing time. You can specify any
height, width, and depth you like for such boxes---but they had better not have
too much area, or the printer might get upset. \ (Printers generally
prefer white space to black space.)
Usually these black boxes are made very skinny, so that they appear as
horizontal lines or vertical lines. Printers traditionally call such lines
``↑{horizontal rules}'' and ``↑{vertical rules},'' so the terms \TeX\ uses
to stand for black boxes are ↑{*hrule} and ↑{*vrule}. Even when the box is
square, as in `\thinspace\bull\thinspace', you must call it either an@|\hrule|
or a@|\vrule|. We shall discuss the use of ↑{rule boxes} in greater
detail later. \ (See Chapter@21.)
\smallbreak
Everything on a page that has been typeset by \TeX\ is made up of simple
character boxes or rule boxes, pasted together in combination. \TeX\
pastes boxes together in two ways, either {\sl horizontally\/} or {\sl
vertically}. When \TeX\ builds a ↑{horizontal list} of boxes, it lines
them up so that their reference points appear in the same horizontal row;
therefore the baselines of adjacent characters will match up as they
should. Similarly, when \TeX\ builds a ↑{vertical list} of boxes, it lines
them up so that their reference points appear in the same vertical column.
% Here are some macros for making blank boxes
\def\dolist#1{\def\next{#1}%
\ifx\next\endlist \let\next\relax
\else \\\let\next\dolist \fi
\next}
\def\\{\ifx\next\space\ \else \setbox0=\hbox{\next}\maketypebox\fi}
\def\demobox#1{\setbox0=\hbox{\dolist#1\endlist}%
\copy0\kern-1wd0\makelightbox}
Let's take a look at what \TeX\ does behind the scenes, by comparing the
computer's methods with what you would do if you were setting metal type
by hand. In the time-tested traditional method, you choose the letters that
you@need out of a type case---the upper-case letters are in the ↑{upper
case}---and you put them into a ``↑{composing stick}.'' When a line is
complete, you adjust the spacing and transfer the result to the ``chase,''
where it joins the other rows of type. Eventually you lock the type up
tightly by adjusting external wedges called ``quoins.'' This isn't much
different from what \TeX\ does, except that different words are used; when
\TeX\ locks up a line, it creates what is called an ``↑{hbox}''
(↑{horizontal box}), because the components of the line are pieced
together horizontally. You can give an instruction like
\begintt
\hbox{A line of type.}
\endtt
in a \TeX\ manuscript; this tells the computer to take boxes for the appropriate
letters in the current font and to lock them up in an hbox. As far as \TeX\ is
concerned, the letter `A' is a box
`\thinspace\setbox0\hbox{A}\maketypebox\thinspace'
and the letter `p' is a box
`\thinspace\setbox0\hbox{p}\maketypebox\thinspace'.
So the given instruction causes \TeX\ to form the hbox
$$\displaybox{\demobox{A line of type.}}$$
representing `A line of type.' The hboxes for individual lines of type are
eventually joined together by putting them into a ``↑{vbox}'' (↑{vertical
box}). For example, you can say
\begintt
\vbox{\hbox{Two lines}\hbox{of type.}}
\endtt
and \TeX\ will convert this into
$$\setbox0=\vbox{\hbox{\demobox{Two lines}}\hbox{\demobox{of type.}}}
\displaybox{$\vcenter{\hbox{\makelightbox\kern-1wd0\box0}}$\qquad
i.e.,\qquad$\vcenter{\vbox{\hbox{Two lines}\hbox{of type.}}}$}$$
The principal difference between \TeX's method and the old way is that metal
types are generally cast so that each character has the same height and
depth; this makes it easy to line them up by hand. \TeX's types have
variable height and depth, because the computer has no trouble lining
characters up by their baselines, and because the extra information about
height and depth helps in the positioning of accents and mathematical
symbols.
Another important difference between \TeX\ setting and hand setting is, of
course, that \TeX\ will choose line divisions automatically; you don't
have to insert ↑{*hbox} and ↑{*vbox} instructions unless you want to
retain complete control over where each letter goes. On the other hand,
if you do use |\hbox| and |\vbox|, you can make \TeX\ do almost everything
that Ben ↑{Franklin} could do in his printer's shop. You're only giving
up the ability to make the letters come out charmingly crooked or badly
inked; for such effects you need to make a new font. \ (And of course you
lose the tactile and olfactory sensations, and the thrill of
doing everything by yourself. \TeX\ will never completely replace the
good@old@ways.)
A page of text like the one you're reading is itself a box, in \TeX's view:
It is a largish box made from a vertical list of smaller boxes representing
the lines of text. Each line of text, in turn, is a box made from a
horizontal list of boxes representing the individual characters. In more
complicated situations, involving mathematical formulas and/or complex
tables, you can have boxes within boxes within boxes $\ldots$ to any level.
But even these complicated situations arise from horizontal or vertical lists
of boxes pasted together in a simple way; all that you and \TeX\ have to
worry about is one list of boxes at a time. In fact, when you're typing
straight text, you don't have to think about boxes at all, since \TeX\ will
automatically take responsibility for assembling the character boxes into
words and the words into lines and the lines into pages. You only need to be
aware of the box concept when you want to do something out of the ordinary,
e.g., when you want to center a heading.
\danger From the standpoint of \TeX's digestive processes, a manuscript
comes in as a sequence of tokens, and the tokens are to be transformed into
a sequence of boxes. Each token of input is essentially an instruction or
a piece of an instruction; for example, the token `|A|$_{11}$' normally means,
``put a character box for the letter |A| at the end of the current hbox,
using the current font''; the token `\cstok{vskip}' normally means, ``skip
vertically in the current vbox by the \<dimen> specified in the
following tokens.''
\danger The height, width, or depth of a box might be negative, in which
case it is a ``shadow box'' that is somewhat hard to draw. \TeX\ doesn't
balk at ↑{negative dimensions}; it just does arithmetic as usual. For example,
the combined width of two adjacent boxes is the sum of their widths, whether
or not the widths are positive. A font designer can declare a character's
width to be negative, in which case the character acts like a backspace. \
(Languages that read from right to left could be
handled in this way, but only to a limited extent, since \TeX's line-breaking
↑(Hebrew) ↑(Arabic)
algorithm is based on the assumption that words don't have negative widths.)
\danger \TeX\ can raise or lower the reference points of individual boxes
in a horizontal list. Such adjustments take care of mathematical
subscripts and superscripts, as well as the heights of accents and a few
other things. For example, here is a way to make a box that contains
the \TeX\ logo, putting it into \TeX's internal register |\box0|:
\begintt
\setbox0=\hbox{T\kern-.1667em\lower.5ex\hbox{E}\kern-.125em X}
\endtt
↑(*setbox)
Here `↑{*kern}|-.1667em|' means to insert blank space of $-.1667$ ems in the
current font, i.e., to back up a bit; and `↑{*lower}|.5ex|' means that
the box |\hbox{E}| is to be lowered by half of the current x-height, thus
offsetting that box with respect to the others. Instead of
`|\lower.5ex|' one could also say `↑{*raise}|-.5ex|'. Chapters 12 and@21
discuss the details of how to construct boxes for special effects;
our goal in the present chapter is merely to get a taste of the
possibilities.
\danger \TeX\ will exhibit the contents of any ↑{box register}, if you
ask it to. For example, if you type `↑{*showbox}|0|' after setting
|\box0| to the \TeX\ logo as above, your ↑{log file} will contain
the following mumbo jumbo: ↑(TeX logo)
\begintt
\hbox(6.83333+2.15277)x18.61073
.\tenrm T
.\kern -1.66702
.\hbox(6.83333+0.0)x6.80554, shifted 2.15277
..\tenrm E
.\kern -1.25
.\tenrm X
\endtt
↑(diagnostic format) ↑(internal box-and-glue representation) ↑(box displays)
The first line means that |\box0| is an hbox whose height, depth, and width
are respectively $6.83333\pt$, $2.15277\pt$, and $18.61073\pt$.
Subsequent lines beginning with `|.|' indicate that they are {\sl inside\/}
of a box. The first thing in this particular box is the letter@|T| in
font |\tenrm|; then comes a kern. The next item is an hbox that contains
only the letter@|E|; this box has the height, depth, and width of an |E|, and
it has been shifted downward by $1.82553\pt$ (thereby accounting for
the depth of the larger box).
\dangerexercise Why are there two dots in the `|..\tenrm E|' line here?
\answer This |E| is inside a box that's inside a box.
\danger Such displays of box contents will be discussed further in
Chapters 12 and@17.
They are used primarily for diagnostic purposes, when you are trying to figure
out exactly what \TeX\ thinks it's doing. The main reason for bringing them
up in the present chapter is simply to provide a glimpse of how \TeX\ represents
boxes in its guts. A computer program doesn't really move boxes around; it
fiddles with lists of representations of boxes.
\dangerexercise By running \TeX, figure out how it actually handles italic
corrections to characters: how are the corrections represented inside a box?
\answer The idea is to construct a box and to look inside. For example,
\begintt
\setbox0=\hbox{\sl g\/} \showbox0
\endtt
reveals that |\/| is implemented by placing a kern after the character.
Further experiment shows that this kern is inserted even when the italic
correction is zero.
\dangerexercise The ``opposite'' of \TeX's logo---namely,
T\kern+.1667em\raise.5ex\hbox{E}\kern+.125em X---is produced by
\begintt
\setbox1=\hbox{T\kern+.1667em\raise.5ex\hbox{E}\kern+.125em X}
\endtt
What would |\showbox1| show now? \ (Try to guess, without running the machine.)
\answer The height, depth, and width of the enclosing box should be just large
enough to enclose all of the contents, so the result is:
\begintt
\hbox(8.9861+0.0)x24.44478
.\tenrm T
.\kern 1.66702
.\hbox(6.83333+0.0)x6.80554, shifted -2.15277
..\tenrm E
.\kern 1.25
.\tenrm X
\endtt
(You probably predicted a width of |24.44477|; \TeX's internal calculations are
in |sp|, not |pt|/100000, so the rounding in the fifth decimal place is not
readily predictable.)
\dangerexercise Why do you think the author of \TeX\ didn't make boxes more
symmetrical between horizontal and vertical, by allowing reference points
to be inside the boundary instead of insisting that the reference point
must appear at the left edge of each box?
\answer No applications of such symmetrical boxes to English-language
printing were apparent; it seemed pointless to carry extra generality
as useless baggage that would rarely if ever be used, merely for the sake of
symmetry. In other words, the author wore a computer science cap instead
of a mathematician's mantle on the day that \TeX's boxes were born.
Time will tell whether or not this was a fundamental error!
\ddangerexercise Construct a |\demobox| macro for use in writing manuals
like this, so that an author can write `|\demobox{Tough exercise.}|'
in order to typeset `\thinspace\demobox{Tough exercise.}\thinspace'.
\answer The following solution is based on a general |\makeblankbox|
macro that prints the edges of a box using rules of given thickness
outside and inside that box; the box dimensions are those of\/ |\box0|.
It is assumed that the macros of Appendix@E are already present.\par
|\def\dolist#1{\def\next{#1}%|\parbreak
| \ifx\next\endlist \let\next\relax|\parbreak
| \else \\\let\next\dolist \fi|\parbreak
| \next}|\par
|\def\hidehrule#1#2{\kern-#1\hrule height#1 depth#2 \kern-#2 }|\par
|\def\hidevrule#1#2{\kern-#1{\setdimen0=#1|\parbreak
| \advdimen0 by#2\vrule width1dm0}\kern-#2 }|\par
|\def\makeblankbox#1#2{\hbox{\lower1dp0\vbox{\hidehrule{#1}{#2}%|\parbreak
| \kern-#1 % overlap the rules at the corners|\parbreak
| \hbox to 1wd0{\hidevrule{#1}{#2}%|\parbreak
| \raise1ht0\vbox to #1{}% set the vrule height|\parbreak
| \lower1dp0\vtop to #1{}% set the vrule depth|\parbreak
| \hfil\hidevrule{#2}{#1}}%|\parbreak
| \kern-#1\hidehrule{#2}{#1}}}}|\par
|\def\maketypebox{\makeblankbox{0pt}{1pt}}|\par
|\def\makelightbox{\makeblankbox{.2pt}{.2pt}}|\par
|\def\\{\ifx\next\space\ |\parbreak
| \else \setbox0=\hbox{\next}\maketypebox\fi}|\par
|\def\demobox#1{\setbox0=\hbox{\dolist#1\endlist}%|\parbreak
| \copy0\kern-1wd0\makelightbox}|\par
\endchapter
I have several boxes in my memory
in which I will keep them all very safe,
% he's talking about "instructions"
there shall not a one of them be lost.
\author IZAAK ↑{WALTON}, {\sl The Compleat Angler\/} (1653) % beginning Chap12
% in 1654 and subsequent editions, this quote comes in Chap17
% the 1653 spelling agrees with 20th century conventions in this passage!
\bigskip
How very little does the amateur, dwelling at home at ease,
comprehend the labours and perils of the author.
\author R. L. ↑{STEVENSON} and L. ↑{OSBOURNE}, {\sl The Wrong Box\/} (1889)
\eject
\beginchapter Chapter 12. Glue
But there's more to the story than just boxes: there's also some magic mortar
called {\sl ↑{glue}\/} that
\TeX\ uses to paste boxes together. For example, there is a little space between
the lines of text in this manual; it has been calculated so that the baselines
of consecutive lines within a paragraph are exactly 12@points apart. And
there is space between words too; such space is not an ``empty'' box, it is
part of the glue between boxes. This glue can stretch or shrink so that the
right-hand margin of each page comes out looking straight.
↑(leading, see baselineskip) ↑(skipping space, see glue)
When \TeX\ makes a large box from a horizontal or vertical list of smaller
boxes, there often is glue between the smaller boxes. Glue has three
attributes, namely its natural {\sl space}, its ability to {\sl ↑{stretch}}, and
its ability to {\sl ↑{shrink}}.
In order to understand how this works, consider the following example of
four boxes in a horizontal list separated by three globs of glue:
$$\eightpoint
\dbox{\vbox{
\hbox{\samplebox{7mm}{8mm}{5vu}{width 5}%
\sampleglue{9vu}{space 9\cr stretch 3\cr shrink 1}%
\samplebox{3mm}{2mm}{6vu}{width 6}%
\sampleglue{9vu}{space 9\cr stretch 6\cr shrink 2}%
\samplebox{8mm}{3mm}{3vu}{width 3}%
\sampleglue{12vu}{space 12\cr stretch 0\cr shrink 0}%
\samplebox{4mm}{7mm}{8vu}{width 8}}
\kern6pt
\arrows{52vu}{width 52}}\hss}$$
The first glue element has 9 units of space, 3 of stretch, and 1 of shrink;
the next one also has 9 units of space, but 6 units of stretch and 2 of
shrink; the last one has 12 units of space, but it is unable to stretch
or to shrink, so it will remain 12 units of space no matter what.
The total width of boxes and glue in this example, considering only the
space components of the glue, is $5+9+6+9+3+12+8=52$ units. This is called
the {\sl ↑{natural width}\/} of the horizontal list; it's the preferred way to
paste the boxes together. Suppose, however, that \TeX\ is told to make the
horizontal list into a box that is 58@units wide; then the glue has to
stretch by 6@units. Well, there are $3+6+0=9$ units of stretchability present,
so \TeX\ multiplies each unit of stretchability by 6/9 in order to obtain the
extra 6@units needed. The first glob of glue becomes $9+(6/9)\times3=11$
units wide, the next becomes $9+(6/9)\times6=13$ units wide, the last remains
12 units wide, and we obtain the desired box looking like this:
$$\eightpoint
\dbox{\vbox{\kern-3pt
\hbox{\samplebox{7mm}{8mm}{5vu}{}%
\sampleglue{11vu}{$9+2$}%
\samplebox{3mm}{2mm}{6vu}{}%
\sampleglue{13vu}{$9+4$}%
\samplebox{8mm}{3mm}{3vu}{}%
\sampleglue{12vu}{$12+0$}%
\samplebox{4mm}{7mm}{8vu}{}}
\kern6pt
\arrows{58vu}{width 58}}\hss}$$
On the other hand, if \TeX\ is supposed to make a box 51 units wide from the
given list, it is necessary for the glue to shrink by a total of one unit. There
are three units of shrinkability present, so the first glob of glue would
shrink by 1/3 and the second by 2/3.
\smallbreak
The process of determining glue thickness when a box is being made from a
horizontal or vertical list is called {\sl ↑{setting the glue}}. Once glue has
been set, it becomes rigid; it won't stretch or shrink any more, and the
resulting box is essentially indecomposable.
Glue will never shrink more than its stated shrinkability. For example,
the first glob of glue in our illustration will never be allowed to become
narrower than 8 units wide, and \TeX\ will never shrink the given
horizontal list to make its total width less than 49 units. But glue is
allowed to stretch arbitrarily far, whenever it has a positive stretch
component.
\exercise How wide would the glue globs be if the horizontal list in the
illustration were to be made 100 units wide?
\answer $9+16$ units, $9+32$ units, $12+0$ units. \ (But \TeX\ would
consider so much stretching to be ``infinitely bad.'')
Once you understand \TeX's concept of glue, you may well decide that it
was misnamed; real glue doesn't stretch or shrink in such ways, nor does it
contribute much space between boxes that it welds together. Another word
like ``spring'' would be much closer to the essential idea, since ↑{springs}
have a natural width, and since different springs compress and expand at
different rates under tension. But whenever the author has suggested
changing \TeX's terminology, numerous people have said that they like the
word ``glue'' in spite of its inappropriateness; so the original name has
stuck.
\danger \TeX\ is somewhat reluctant to stretch glue more than the stated
stretchability; therefore you can decide how big to make each aspect of the
glue in some layout by using the following rules: \ (a)@The natural
glue space should be the amount of space that looks best. \ (b)@The glue
stretch should be the maximum amount of space that can be added to the natural
spacing before the layout begins to look bad. \ (c)@The glue shrink should
be the maximum amount of space that can be subtracted from the natural spacing
before the layout begins to look bad.
In most cases the designer of a book layout will have specified all the kinds
of glue that are to be used, so a typist will not need to decide how big any
glue attributes should be. For example, users of the plain \TeX\ format of
Appendix@B can type `|\smallskip|' when they want a little extra ↑{space
between paragraphs}; a ↑{:smallskip} turns out to be $3\pt$ worth of
vertical glue that can stretch or shrink by an additional@$1\pt$. Here is
a |\smallskip|: \smallskip
\noindent
Instead of sprinkling various amounts of glue throughout a manuscript,
expressing each of them explicitly in terms of points, you will find it
much better to explain your intentions more clearly by typing something
like `|\smallskip|' when you want abnormal spacing. The definition of\/
|\smallskip| can readily be changed later, in case you want such spaces to
be smaller or larger. Plain \TeX\ also provides you with `↑{:medskip}',
which is worth two smallskips, and `↑{:bigskip}', which is worth two medskips.
\danger A plain \TeX\ |\medskip| appears before and after each
``↑{dangerous bend}'' section of this manual, so you have already seen
numerous examples of such spacing before you knew what it was called.
Vertical glue is created by writing `|\vskip|\<glue>', where ↑{<glue} is
any glue specification. The usual way to specify \<glue> to \TeX\ is
$$\displaybox{\<dimen> |plus|\<dimen> |minus|\<dimen>}$$
where the `|plus|\<dimen>' and `|minus|↑{<dimen}' are optional and assumed
to be zero if not present; `↑{.plus}' introduces the amount of
stretchability, `↑{.minus}' introduces the amount of shrinkability.
For example, Appendix@B defines |\medskip| to be an abbreviation for
`|\vskip6pt plus2pt minus2pt|'. The normal-space component of glue must
always be given as an explicit \<dimen>, even when it is zero.
\danger Horizontal glue is created in the same way, but with ↑{*hskip}
instead of\/ ↑{*vskip}. For example, plain \TeX\ defines ↑{:enskip} as an
abbreviation for the command `|{\hskip.5em}|'; this skips horizontally by
one ``↑{en},'' i.e., by half of an em in the current font. There is no
stretching or shrinking in an |\enskip|. Notice the braces that appear
in `|{\hskip.5em}|'; they prevent \TeX\ from thinking that a ↑{keyword}
is present, in case the text following |\enskip| just happens to begin
with `|plus|' or `|minus|'.
One of the interesting things that happens when glue stretches and shrinks
at different rates is that there might be glue with {\sl ↑{infinite}\/}
stretchability. For example, consider again the four boxes we had at the
beginning of this chapter, with the same glue as before except that the
glue in the middle can stretch infinitely far. Now the total
stretchability is infinite; and when the line has to grow, all of the
additional space is put into the middle glue. If, for example, a box of
width 58 is desired, the middle glue expands from 9 to@15 units, and the
other spacing remains unchanged.
If such infinitely stretchable glue is placed at the left of a row of boxes,
the effect is to place them ``flush right,'' i.e., to move them over to the
↑(right justification) ↑(centering) ↑(flush right)
rightmost boundary of the constructed box. And if you take {\sl two\/} globs of
infinitely stretchable glue, putting one at the left and one at the right, the
effect is to {\sl center\/} the list of boxes within a larger box. This in fact
is how the ↑{:centerline} instruction works in plain \TeX: it places infinite
glue at both ends, then makes a box whose width is the current value of
|\hsize|.
The short story example of Chapter 6 used infinite glue not only for
centering, but also in the ↑{*vfill} instruction at the end; `|\vfill|'
essentially means ``skip vertically by zero, but with infinite stretchability.''
In other words, |\vfill| fills up the rest of the current page with
blank space.
\danger \TeX\ actually recognizes several kinds of infinity, some of which
are ``more infinite'' than others. You can say both ↑{*vfil} and |\vfill|;
the second is stronger than the first. In other words, if no other
infinite stretchability is present, |\vfil| will expand to fill the remaining
space; but if both |\vfil| and |\vfill| are present simultaneously,
the |\vfill| effectively prevents |\vfil| from stretching. You can think
of it as if\/ |\vfil| has one mile of stretchability, while |\vfill| has
a trillion miles.
\danger Besides |\vfil| and |\vfill|, \TeX\ has ↑{*hfil} and ↑{*hfill},
for stretching indefinitely in the horizontal direction. You can also say
↑{*hss} or ↑{*vss}, in order to get glue that is infinitely shrinkable as
well as infinitely stretchable. \ (`|\hss|' stands for ``horizontal
stretch or shrink''; `|\vss|' is its vertical counterpart.) \ Finally, the
primitives ↑{*hfilneg} and ↑{*vfilneg} will cancel the stretchability of
|\hfil| and |\vfil|; we will discuss applications of these curious glues later.
\danger Here are some examples of\/ |\hfil|, using the ↑{:line} macro of
plain \TeX, which creates an hbox whose width is the current |\hsize|:
↑(flush left)
\begintt
\line{This text will be flush left.\hfil}
\line{\hfil This text will be flush right.}
\line{\hfil This text will be centered.\hfil}
\line{Some text flush left\hfil and some flush right.}
\line{Alpha\hfil centered between Alpha and Omega\hfil Omega}
\endtt
\dangerexercise Describe the result of \xdef\linexno{\the\count\exno}%
\begintt
\line{\hfil\hfil What happens now?\hfil}
\line{\hfill\hfil and now?\hfil}
\endtt
\answer `What happens now?' is placed in a line of width |\hsize|, with
twice as much space at the left as at the right; `and now?' is put flush right
on the following line.
\ddangerexercise How do the following three macros behave differently?
\begintt
\def\centerlinea#1{\line{\hfil#1\hfil}}
\def\centerlineb#1{\line{\hfill#1\hfill}}
\def\centerlinec#1{\line{\hss#1\hss}}
\endtt
\answer The first two give an ``overfull box'' if the argument doesn't fit
on a line; the third allows the argument to stick out into the margins
instead. \ (Plain \TeX's ↑{:centerline} is |\centerlinec|; the stickout effect
shows up in the narrow-column experiment of Chapter@6.) \ If the argument
contains no infinite glue, |\centerlinea| and |\centerlineb| produce the same
effect; but |\centerlineb| will center an argument that contains `fil' glue.
\danger In order to specify such infinities, you are allowed to use
the special units `↑{.fil}', `↑{.fill}', and `↑{.filll}' in the \<dimen>
parts of a stretchability or shrinkability component. For example,
|\vfil|, |\vfill|, |\vss|, and |\vfilneg| are essentially equivalent
to the glue specifications
\begintt
\vskip 0pt plus 1fil
\vskip 0pt plus 1fill
\vskip 0pt plus 1fil minus 1fil
\vskip 0pt plus -1fil
\endtt
respectively. It's usually best to stick to the first order infinity
(fil) as much as you can, resorting to second order (fill) only when
you really need something extremely infinite. Then the ultimate order (filll) is
always available as a last resort in emergencies. \ (\TeX\ does not
provide a `↑{:vfilll}' primitive, since the use of this highest infinity
is not encouraged.) \ You can use fractional multiples of infinity like
`|3.25fil|', as long as you stick to fewer than 16384 fil units. \TeX\
actually does its calculations with integer multiples of $\rm2↑{-16}\,fil$
(or fill or filll); so |0.000007filll| turns out to be indistinguishable
from |0pt|, but |0.00001filll| is infinitely greater than |16383.99998fill|.
Now here's something important for all \TeX nical typists to know:
Plain \TeX\ puts extra space at the end of a ↑{sentence}; furthermore,
it automatically increases the stretchability (and decreases
the shrinkability) after ↑{punctuation} marks. The reason is that it's
usually better to put more space after punctuation than between two ordinary
words, when spreading a line out to reach the desired margins. Consider, for
example, the following sentences from a classic kindergarten pre-primer:
↑(Dick and Jane)
\begintt
``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.
\endtt
If \TeX\ sets this at its natural width, all the spaces will be the same,
except after the quote and after `Baby Sally.':
$$\displaybox{``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.}$$
But if the line needs to be expanded by 5 points, 10 points, 15 points, or more,
\TeX\ will set it as
$$\halign{\indent#\hfil\cr
\hbox expand 5pt{``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.}\cr
\hbox expand 10pt{``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.}\cr
\hbox expand 15pt{``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.}\cr
\hbox expand 20pt{``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.}\cr}$$
The glue after the comma stretches at 1.25 times the rate of the
glue between adjacent words; the glue after the period and after the |!''|
stretches at 3 times the rate. There is no glue between adjacent letters,
so individual words will always look the same. If \TeX\ had to shrink
this line to its minimum width, the result would be
$$\hfuzz 1000pt % suppress overfull box message
\displaybox{\hbox to 0pt{``Oh, oh!'' cried Baby Sally.
Dick and Jane laughed.}}$$
The glue after a ↑{comma} shrinks only 80 per cent as much as ordinary
inter-word glue, and after a ↑{period} or ↑{exclamation point} or
↑{question mark} it shrinks by only one third as much.
This all makes for nice-looking output, but it unfortunately adds a bit
of a burden to your job as a typist, because \TeX's rule for determining
the end of a sentence {\sl doesn't always work}. The problem is that
a period sometimes comes in the middle of a sentence $\ldots$ like when it
is used (as here) to make an ``↑{ellipsis}'' of three dots.
↑(three dots, see ellipsis) ↑(dot dot dot, see ellipsis)
Moreover, if you try to specify `$\ldots$' by typing three periods in
a row, you get `...'---the dots are too close together. The best way to
handle this is to go into {\sl mathematics\/} mode, using the ↑{:ldots}
control sequence defined in plain \TeX\ format. For example, if you type
\begintt
Hmmm $\ldots$ I wonder why?
\endtt
the result is `Hmmm $\ldots$ I wonder why?'. This works because
math formulas are exempt from the normal text spacing rules.
Chapter 18 has more to say about |\ldots| and related topics.
↑{Abbreviations} present problems too. For example, the short story in
Chapter@6 referred to `Mr.@↑{Drofnats}'; \TeX\ must be told somehow that the
period after `Mr.'\ or `Mrs.'\ or `Ms.' or `Prof.'\ or `Dr.' or `Rt.@Hon.',
etc., doesn't count as a sentence-ending ↑{full stop}.
We avoided that embarrassment in Chapter@6 by typing `|Mr.@Drofnats|';
the ``↑{tie}'' mark |@| ↑(at sign) tells plain \TeX\ to insert a normal
space, and to refrain from breaking between lines at that space. Another way
to get \TeX\ to put out a normal space is to type `|\|\]' (↑{escape
space}); e.g., `|Mr.\ Drofnats|' would be almost the same as `|Mr.@Drofnats|',
except that a line might end after the `Mr.'.
The tie mark is best for abbreviations within a name, and after several
other common abbreviations like `Fig.'\ and `cf.'\ and `vs.'\ and `resp.';
you will find that it's easy to train yourself to type `|cf.@Fig.@5|'.
In fact, it's usually wise to type |@| (instead of a space) just after a common
abbreviation that occurs in the middle of a sentence.
Manuals of style will tell you that the abbreviations `e.g.'\ and `i.e.'\
should always be followed by commas, never by spaces, so those particular
cases shouldn't need any special treatment.
The only remaining abbreviations that arise with significant frequency
occur in bibliographic references; ↑{escape-spaces} are appropriate here.
↑(interword spacing)
If, for example, you are typing a manuscript that refers to
`Proc.\ Amer.\ Math.\ Soc.', you should say
\begintt
Proc.\ Amer.\ Math.\ Soc.
\endtt
Granted that this input looks a bit ugly, it makes the output look right.
It's one of the things we occasionally must do when dealing with a computer
that tries to be smart.
\exercise Explain how to type the following sentence: ``Mr.@\& Mrs.@↑{User}
were married by Rev.@↑{Drofnats}, who preached on
Matt.@19\thinspace:\thinspace3--9.''
\answer |Mr.@\& Mrs.@User
were married by Rev.@Drofnats, who preached on
Matt.@19\thinspace:\thinspace3--9.| \ (Such thin spaces are traditional
for ↑{Biblical references} to chapter and verse, but you weren't really
expected to know that. Plain \TeX\ defines ↑{:thinspace} to be a kern,
not glue; hence no break between lines will occur at a thinspace.)
\exercise Put the following bibliographic reference into plain \TeX\
language: Donald@E. ↑{Knuth}, ``Mathematical typography,'' {\sl Bull.\
Amer.\ Math.\ Soc.\ \bf1} (1979), 337--372.
\answer |Donald@E.\ Knuth, ``Mathematical typography,'' {\sl Bull.\
Amer.\ Math.\ Soc.\ \bf1} (1979), 337--372.| \ (But the `|\|' after `|E.|'
isn't necessary, because of a rule you will learn if you venture
around the next dangerous bend.)
On the other hand, if you don't care about such refinements of spacing
you can tell plain \TeX\ to make all spaces the same, regardless of
punctuation marks, by simply typing `↑{:frenchspace}' at the beginning
of your manuscript. French spacing looks like this:
$$\frenchspace
\displaybox{``Oh, oh!'' cried Baby Sally. Dick and Jane laughed.}$$
You can also shift back and forth between the two styles, either by saying
`↑{:nonfrenchspace}' to establish ↑{sophisticated spacing}, or by making
your use of\/ |\frenchspace| local to some group. For example, you might
want to use French spacing only when typing the bibliography of some document.
\danger \TeX\ doesn't consider a period to be the end of a sentence if the
preceding character is an upper-case letter, since \TeX\ assumes that
such upper-case letters are most likely somebody's initials.
Thus, for example, the `|\|' is unnecessary after the@`|I.|' in
`|Dr.@Livingstone@I.\ Presume|'; that particular period is not assumed to be
a full stop. ↑(Presume)
\dangerexercise What can you do to make \TeX\ recognize the ends of sentences
that do end with upper-case letters (e.g., `$\ldots$ launched by NASA.\null' or
`$\ldots$ see Appendix@A.')?
\answer There are several ways; perhaps the easiest are to type
`|\hbox{NASA}.|'\ or `|NASA\null.|' \ (The ↑{:null} macro is an abbreviation
for `|\hbox{}|'.)
\danger You can see the glue that \TeX\ puts between words by looking at
the contents of hboxes in the internal ↑{diagnostic format} that we discussed
↑(internal box-and-glue representation)
briefly in Chapter@11. For example, the line above begins as follows, after
\TeX\ has digested it and put it into a box, assuming |\nonfrenchspacing|:
\begintt
.\tenrm \ (ligature ``)
.\tenrm O
.\tenrm h
.\tenrm ,
.\glue 3.33333 plus 2.08331 minus 0.88887
.\tenrm o
.\tenrm h
.\tenrm !
.\tenrm " (ligature '')
.\glue 4.44443 plus 4.99997 minus 0.37036
.\tenrm c
.\tenrm r
.\tenrm i
.\tenrm e
.\tenrm d
.\glue 3.33333 plus 1.66666 minus 1.1111
.\tenrm B
.\tenrm a
.\tenrm b
.\kern-0.27779
.\tenrm y
.\glue 3.33333 plus 1.66666 minus 1.1111
.\tenrm S
.\tenrm a
.\tenrm l
.\tenrm l
.\tenrm y
.\kern-0.83334
.\tenrm .
.\glue 4.44443 plus 4.99997 minus 0.37036
\endtt
The normal ↑{interword glue} in font |\tenrm| is $3.33333\pt$, plus
$1.66666\pt$ of stretchability, minus $1.1111\pt$ of shrinkability.
Notice that the interword |\glue| in this list stretches more, and shrinks
less, after the punctuation marks; and the natural space is in fact larger
at the end of each sentence. This example also shows several other things
that \TeX\ does while it processes the sample line of text: It converts
|``| and |''| into single characters, i.e., ↑{ligatures}; and it inserts
small ↑{kerns} in two places to improve the spacing. A ↑{*kern} is similar
to glue, but it is not the same, because kerns cannot stretch or shrink;
furthermore, \TeX\ will never break a line at a kern, unless that kern is
immediately followed by glue.
\ddanger You may be wondering what \TeX's rules for interword glue really
are, exactly. For example, how did \TeX\ remember the effect of Baby Sally's
exclamation point, when quotation marks intervened before the next space?
The details are slightly tricky, but not incomprehensible. When \TeX\
is processing a horizontal list of boxes and glue, it keeps track of a positive
integer called the current ``↑{space factor}.'' The space factor is normally
1000, which means that the interword glue should not be modified. If the
space factor $f$ is different from 1000, the interword glue is computed as
follows: Take the normal space glue for the current font, and add the
extra space if $f\ge2000$. \ (Each font specifies a normal space, normal
stretch, normal shrink, and extra space; for example, these quantities are
$3.3333\pt$, $1.66666\pt$, $1.1111\pt$, and $1.1111\pt$, respectively, in
↑{cmr10}. We'll discuss such `↑{*texinfo}' in greater detail later.) \
Then the stretch component is multiplied by $f/1000$, while the shrink
component is multiplied by $1000/f$.
\ddanger However, \TeX\ has two parameters ↑{*spaceskip} and ↑{*xspaceskip}
that allow you to override the normal spacing of the current font. If
$f\ge2000$ and if\/ |\xspaceskip| is nonzero, the |\xspaceskip| glue is
used for an ↑{interword space}. Otherwise if\/ |\spaceskip| is nonzero,
the |\spaceskip| glue is used, with stretch and shrink components
multiplied by $f/1000$ and $1000/f$. For example, the ↑{:raggedright}
macro of plain \TeX\ uses |\spaceskip| and |\xspaceskip| to suppress all
stretching and shrinking of interword spaces.
\ddanger The space factor $f$ is 1000 at the beginning of a horizontal list,
and it is set to 1000 just after a non-character box or a math formula
has been put onto the current horizontal list. You can say
`↑{*spacefactor}\<number>' to assign any particular value to the space factor;
but ordinarily, $f$ gets set to a number other than 1000 only when a simple
character box goes on the list. Each character has a ↑{space factor code},
and when a character whose space factor code is $g$ enters the current
list the normal procedure is simply to assign $g$ as the new space factor.
However, if $g$ is zero, $f$ is not changed; and if $f<1000<g$, the
space factor is set to@1000. \ (In other words, $f$ doesn't jump from a
value less than@1000 to a value greater than@1000 in a single step.)
\ddanger When ↑{.INITEX} creates a brand new \TeX, all characters have
a space factor code of@1000, except that the upper-case letters `|A|'
through@`|Z|' have code@999. \ (This slight difference is what makes
punctuation act differently after an upper-case letter; do you see why?)
\ Plain \TeX\ redefines a few of these codes using the ↑{*sfcode}
primitive, which is similar to |\catcode| (see Appendix@B\null); for
example, the instructions
\begintt
\sfcode`)=0 \sfcode`.=3000
\endtt
make right parentheses ``transparent'' to the space factor, while tripling
the stretchability after periods. The |\frenchspace| operation resets
|\sfcode`.| to 1000.
\ddanger When ligatures are formed, or when a special character is
specified via ↑{*char}, the space factor code is taken from the final
character actually appended to the horizontal list, not from the
individual characters that generated the ligature itself. For example,
Appendix@B sets |\sfcode`"=0|, since the characters |''| combine to form a
ligature that is in character position \oct{42}, which is |`"| in ascii.
This double-right-quote is supposed to have a space factor code of zero,
so that the effects of punctuation will be propagated. A character whose
character code is 128@or@more is required to have a space factor code
of@1000, since \TeX\ maintains a changeable |\sfcode| only for
characters@0@to@127.
\ddangerexercise What are the space factors after each token
of the Dick-and-Jane example?
\answer 1000, except: 999 after |B|, |S|, |D|, and |J|;
1250 after the comma; 3000 after the exclamation point, the double-right-quote,
and the periods.
\danger Here's the way \TeX\ goes about ↑{setting the glue} when an hbox
is being wrapped up: The natural width, $x$, of the box contents is
determined by adding up the widths of the boxes and kerns inside, together
with the natural widths of all the glue inside. Furthermore the total
amount of glue stretchability and shrinkability in the box is computed;
let's say that there's a total of $y_0+y_1\,{\rm fil}+y_2\,{\rm
fill}+y_3\,{\rm filll}$ available for stretching and $z_0+z_1\,{\rm
fil}+z_2\,{\rm fill}+z_3\,{\rm filll}$ available for shrinking. Now the
natural width@$x$ is compared to the desired width@$w$. If $x=w$,
all glue gets its natural width.
Otherwise the glue will be modified, by computing a ``↑{glue set
ratio}''@$r$ and a ``↑{glue set order}''@$o$ in the following way: \
(a)@If $x<w$, \TeX\ attempts to stretch the contents of the box; the glue
order is the highest subscript@$o$ such that $y_o$ is nonzero, and the
glue ratio is $r=(w-x)/y_o$. (If $y_0=y_1=y_2=y_3=0$, there's no
stretchability; both $o$ and $r$ are set to zero.) \ (b)@If $x>w$, \TeX\
attempts to shrink the contents of the box in a similar way; the glue
order is the highest subscript@$o$ such that $z_o\ne0$, and the glue ratio
is normally $r=(x-w)/z_o$. However, $r$ is set to 1.0 in the case $o=0$
and $x-w>z_0$, because the maximum shrinkability must not be exceeded. \
(c)@Finally, every glob of glue in the horizontal list being boxed is
modified. Suppose the glue has natural width@$u$, stretchability@$y$, and
shrinkability@$z$, where $y$@is a $j$th order infinity and $z$@is a $k$th
order infinity. Then if $x<w$ (stretching), this glue takes the new width
$u+ry$ if $j=o$; it keeps its natural width@$u$ if $j\ne o$. If $x>w$
(shrinking), this glue takes the new width $u-rz$ if $k=o$; it keeps
its natural width@$u$ if $k\ne o$. Notice that stretching or shrinking
occurs only when the glue has the highest order of infinity that doesn't
cancel out.
\danger \TeX\ will construct an hbox that has a given width $w$ if you issue
the command `\hbox{|\hbox to |\<dimen>|{|\<contents of box>|}|}', where
$w$ is the value of the \<dimen>. For example, the ↑{:line} macro discussed
earlier in this chapter is simply an abbreviation for `|\hbox to\the\hsize|'.
↑(.to)↑(*hbox)
\TeX\ also allows you to specify the exact amount of stretching or shrinking;
the command `\hbox{|\hbox expand|\<dimen>|{|\<contents of box>|}|}'
creates a box whose width@$w$ is a given amount more ↑(.expand)
than the natural width of the contents. For example, one of the boxes
displayed earlier in this chapter was generated by
\begintt
\hbox expand 5pt{``Oh, oh!'' ... laughed.}
\endtt
In the simplest case, when you just want a box to have its natural width,
you don't have to write `|\hbox expand 0pt|'; you can simply say
`|\hbox{|\<contents of box>|}|'.
\danger The ↑{baseline} of a constructed hbox is the common baseline of the
boxes inside. \ (More precisely, it's the common baseline that they would
share if they weren't raised or lowered.) \ The height and depth of a
constructed hbox are determined by the maximum distances by which the
interior boxes reach above and below the baseline, respectively. The
result of\/ |\hbox| never has negative height or negative depth, but the
width can be negative.
\dangerexercise Assume that
|\box1| is $1\pt$@high, $1\pt$@deep, and $1\pt$@wide;
|\box2| is $2\pt$@high, $2\pt$@deep, and $2\pt$@wide.
A third box is formed by saying ↑(*setbox)
\begintt
\setbox3=\hbox to3pt{\hfil\lower3pt\box1\hskip-3pt plus3fil\box2}
\endtt
What are the height, depth, and width of\/ |\box3|? Describe the position
of the reference points of boxes 1 and@2 with respect to the reference
point of box@3.
\answer |\box3| is $2\pt$ high, $4\pt$ deep, $3\pt$ wide.
Starting at the reference point of\/ |\box3|, go right $.75\pt$ and down
$3\pt$ to reach the reference point of\/ |\box1|; or go right $1\pt$
to reach the reference point of\/ |\box2|.
\danger The process of setting glue for vboxes is similar to that for
hboxes; but before we study the |\vbox| operation, we need to discuss how
\TeX\ stacks boxes up vertically so that their baselines tend to be
a fixed distance apart. The boxes in a horizontal list often touch each
other, but it's usually wrong to do this in a vertical list; imagine
how awful a page would look if its lines of type were brought closer
together whenever they didn't contain tall letters, or whenever they
didn't contain any letters that descended below the baseline.
\danger \TeX's solution to this problem involves three primitives called
↑{*baselineskip}, ↑{*lineskip}, and ↑{*lineskiplimit}. A format designer
chooses values of these three quantities by writing
$$\displayvbox{\halign{#\hfil\cr
|\baselineskip=|\<glue>\cr
|\lineskip=|\<glue>\cr
|\lineskiplimit=|\<dimen>\cr}}$$
and the interpretation is essentially this: Whenever a box is added to a
vertical list, \TeX\ inserts ``↑{interline glue}'' intended to make
the distance between the baseline of the new box and the baseline of
the previous box exactly equal to the value of\/ |\baselineskip|.
But if the interline glue calculated by this rule would cause the
top edge of the new box to be closer than |\lineskiplimit| to the
bottom edge of the previous box, then |\lineskip| is used as the
interline glue. In other words, the distance between adjacent baselines
will be the |\baselineskip| setting, unless that would bring the boxes
too close together; the |\lineskip| glue will separate adjacent boxes
in the latter case.
\danger The rules for interline glue in the previous paragraph are
carried out without regard to other kinds of glue that might be present;
all vertical spacing due to explicit appearances of\/ |\vskip| and |\kern|
acts independently of the interline glue. Thus, for example, a
↑{:smallskip} between two lines always makes their baselines further apart
than usual, by the amount of a |\smallskip|; it does not
affect the decision about whether |\lineskip| glue is used between
those lines.
\danger For example, let's suppose that |\baselineskip=12pt plus 2pt|,
|\lineskip=|\penalty0|3pt minus 1pt|, and |\lineskiplimit=2pt|. \ (These values
aren't particularly useful; they have simply been chosen to illustrate
the rules.) \ Suppose further that a box whose depth is $3\pt$
was most recently added to the current vertical list; we are about to
add a new box whose height is@$h$. If $h=5\pt$, the interline glue
will be $4\pt$@plus@$2\pt$, since this will make the baselines
$12\pt$@plus@$2\pt$ apart when we add $h$ and the previous depth to
the interline glue. But if $h=8\pt$, the interline
glue will be $3\pt$@minus@$1\pt$, since |\lineskip| will be
chosen in order to keep from violating the given |\lineskiplimit|.
\danger When you are typesetting a document that spans several pages,
it's generally best to define the |\baselineskip| so that it cannot stretch
or shrink, because this will give more uniformity to the pages. A small
variation in the distance between baselines---say only half a point---can
make a subtantial difference in the appearance of the type, since it
significantly affects the proportion of white to black. On the other hand,
if you are preparing a one-page document, you might want to give the
baselineskip some stretchability, so that \TeX\ will help you fit the copy
on the page.
\dangerexercise What settings of\/ |\baselineskip|, |\lineskip|, and
|\lineskiplimit| will cause the interline glue to be a ``continuous''
function of the next box height (i.e., the interline glue will never
change a lot when the box height changes only a little)?
\answer The stretch and shrink components of\/ |\baselineskip| and
|\lineskip| should be equal, and the |\lineskiplimit| should
equal the normal |\lineskip| spacing, to guarantee continuity.
\danger A study of \TeX's ↑{internal box-and-glue representation} should
help to firm up some of these ideas. Here is an excerpt from the vertical
list that \TeX\ constructed when it was typesetting this very paragraph:
\begintt
\glue 6.0 plus 2.0 minus 2.0
\glue(\parskip) 0.0 plus 1.0
\glue(\baselineskip) 1.25
\hbox(7.5+1.94444)x312.0, glue set 0.79535, shifted 36.0 []
\penalty 10000
\glue(\baselineskip) 2.80556
\hbox(6.25+1.94444)x312.0, glue set 0.57829, shifted 36.0 []
\penalty 50
\glue(\baselineskip) 2.80556
\hbox(6.25+1.75)x348.0, glue set 116.60724fil []
\penalty 10000
\glue(\abovedisplayskip) 6.0 plus 3.0 minus 1.0
\glue(\lineskip) 1.0
\hbox(149.25+0.74998)x348.0 []
\endtt
{\showboxdepth0\showboxbreadth9999\batchmode\showlists\errorstopmode}%
% The log file now contains lines like these; I copied them
% into this MS! But I deleted an `insert' for the index...
The first |\glue| in this example is the ↑{:medskip} that precedes each
dangerous-bend paragraph. Then comes the ↑{*parskip} glue, which is
automatically supplied before the first line of a new paragraph. Then
comes some interline glue of $1.25\pt$; it was calculated to make
a total of $11\pt$ when the height of the next box ($7.5\pt$)
and the depth of the previous box were added. \ (The previous box is not
shown---it's the bottom line of exercise@\chapno.\the\count\exno)---but
we can deduce that its depth was $2.25\pt$.) \ The |\hbox| that
follows is the first line of this paragraph; it has been shifted right
$36\pt$ because of ↑{hanging indentation}. The glue set ratio
for this hbox is 0.79535; i.e., the glue inside is stretched by 79.535\%\
of its stretchability. \ (In the case of shrinking, the ratio following
`↑{.glue set}' would have heen preceded by `|- |'; hence we know that
stretching is involved here.) \ \TeX\ has put `|[]|' at the end of each
hbox line to indicate that there's something in the box that isn't shown.
\ (The box contents would have been displayed completely, if
↑{*showboxdepth} had been set higher.) \ The ↑{*penalty} indications are
used to discourage bad breaks between pages, as we will see later. The
third hbox has a glue ratio of 116.60724, which applies to
first-order-infinite stretching (i.e., fil); this results from an |\hfil|
that was implicitly inserted just before the displayed material, to fill
up the third line of the paragraph. Finally the big hbox whose height is
$149.25\pt$ causes |\lineskip| to be the interline glue. This large box
contains the individual lines of typewriter type that are displayed; they
have been packaged into a single box so that they cannot be split between
pages. Careful study of this example will teach you a lot about \TeX's
inner workings.
\danger Exception: No interline glue is inserted before or after a rule
box. You can also inhibit interline glue by saying ↑{:nointerlineskip}
between boxes.
\ddanger \TeX's implementation of interline glue involves another primitive
quantity called ↑{*prevdepth}, which usually contains the depth of the
most recent box on the current vertical list. However, |\prevdepth| is set
to the sentinel value $-1000\pt$ at the beginning of a vertical list, or
just after a rule box; this serves to suppress the next interline glue.
The user can change the value of\/ |\prevdepth| at any time when building
a vertical list; thus, for example, the |\nointerlineskip| macro of
Appendix@B simply expands to `|\prevdepth=-1000pt|'.
\ddanger Here are the exact rules by which \TeX\ calculates the interline
glue between boxes: Assume that a new box of height@$h$ (not a rule box)
is about to be appended to the bottom of the current vertical list, and
let $\hbox{|\prevdepth|}=p$, $\hbox{|\lineskiplimit|}=l$,
\hbox{$\hbox{|\baselineskip|}=(b$ plus@$y$ minus@$z)$}. If $p\le-1000\pt$,
no interline glue is added. Otherwise if $b-p-h\ge l$, the interline glue
`$(b-p-h)$ plus@$y$ minus@$z$' will be appended just above the new box.
Otherwise the |\lineskip| glue will be appended. Finally, |\prevdepth| is
set to the depth of the new box.
\ddangerexercise Mr.@B. L. ↑{User} had an application in which he wanted
to put a number of boxes together in a vertical list, with no space
between them. He didn't want to say |\nointerlineskip| after each box;
so he decided to set |\baselineskip|, |\lineskip|, and |\lineskiplimit|
all equal to |0pt|. Did this work?
\answer Yes it did, but only because none of his boxes had a negative
height or depth. He would have been safer if he had set
|\baselineskip=-1000pt|, |\lineskip=0pt|, and
|\lineskiplimit=16383pt|. \ (Plain \TeX's ↑{:offinterlineskip} macro does this.)
\danger The vertical analog of\/ |\hbox| is ↑{*vbox}, and \TeX\ will obey
the commands `|\vbox to|\<dimen>' and `|\vbox expand|\<dimen>' in about
the way you would expect, by analogy with the horizontal case. However,
there's a slight complication because boxes have both height and depth
in the vertical direction, while they have only width in the
horizontal direction. The dimension in a |\vbox| command refers to
the final height of the vbox, so that, for example, `|\vbox to 50pt{...}|'
produces a box that is $50\pt$ high; this is appropriate because everything
that can stretch or shrink inside a vbox appears in the part that
contributes to the height, while the depth is unaffected by glue setting.
\danger The depth of a constructed |\vbox| is best thought of as the depth
↑(depth of box) ↑(height of box)
of the bottom box inside. Thus, a vbox is conceptually built by taking
a bunch of boxes and arranging them so that their reference
points are lined up vertically; then the reference point of the lowest
box is taken as the reference point of the whole, and the glue is set
so that the final height has some desired value.
\danger However, this description of vboxes glosses over some
technicalities that come up when you consider unusual cases. For example,
\TeX\ allows you to shift boxes in a vertical list to the right or to
the left by saying
`↑{*moveright}\<dimen>\<box>' or `↑{*moveleft}\<dimen>\<box>'; this is
like the ability to ↑{*raise} or ↑{*lower} boxes in a horizontal list, and
it implies that the reference points inside a vbox need not always lie in
a vertical line. Furthermore, it is necessary to guard against boxes that
have too much depth, lest they extend too far into the bottom margin of a
page; and later chapters will point out that vertical lists can contain
other things like penalties and marks, in addition to boxes and glue.
\ddanger Therefore, the actual rules for the depth of a constructed vbox
are somewhat \TeX nical. Here they are: Given a vertical list that
is being wrapped up via |\vbox|, the problem is to determine its
natural depth. \ (1)@If the vertical list contains no boxes, the depth
is zero. \ (2)@If there's at least one box, but if the final box is
followed by kerning or glue, possibly with intervening penalties or
other things, the depth is zero. \ (3)@If there's at least
one box, and if the final box is not followed by kerning or glue, the
depth is the depth of that box. \ (4)@However, if the depth computed
by rules (1), (2), or@(3) exceeds ↑{*boxmaxdepth}, the depth will be
the current value of\/ |\boxmaxdepth|. \ (Plain \TeX\ sets |\boxmaxdepth|
to the largest possible dimension; therefore rule@(4) won't apply unless
you specify a smaller value. When rule@(4) does decrease the depth,
\TeX\ adds the excess depth to the box's natural height, somewhat as
if the bottom box has been pushed up until its depth is reduced to
the stated maximum.)
\danger The glue is set in a vbox just as in an hbox, by determining a
↑{glue set ratio} and a ↑{glue set order}, based on the difference between
the natural height@$x$ and the desired height@$w$, and based on the amount
of stretchability and shrinkability that happens to be present.
\danger The width of a computed |\vbox| is the maximum distance by which
an enclosed box extends to the right of the reference point, taking
possible shifting into account. This width is always nonnegative.
\dangerexercise Assume that |\box1| is $1\pt$ high, $1\pt$ deep, and
$1\pt$ wide; |\box2| is $2\pt$ high, $2\pt$ deep, and $2\pt$ wide; the
baselineskip, lineskip, and lineskiplimit are all zero; and the |\boxmaxdepth|
is very large. A third box is formed by saying
\begintt
\setbox3=\vbox to3pt{\moveright3pt\box1\vskip-3pt plus3fil\box2}
\endtt
What are the height, depth, and width of\/ |\box3|? Describe the position
of the reference points of boxes 1 and@2 with respect to the reference point
of box@3.
\answer The interline glue will be zero, and the natural height is
$4\pt$ (because the depth of |\box2| isn't included in the natural
height); so the glue will ultimately become |\vskip-1pt| when it's set.
Thus, |\box3| is $3\pt$ high, $2\pt$ deep, $4\pt$ wide. Its reference
point coincides with that of\/ |\box2|; to get to the reference point
of\/ |\box1| you go up $2\pt$ and right $3\pt$.
\ddangerexercise Under the assumptions of the previous exercise, but
with |\baselineskip=9pt minus3fil|, describe |\box4| after
\begintt
\setbox4=\vbox to4pt{\vss\box1\moveleft4pt\box2\vss}
\endtt
\answer The interline glue will be $6\pt$ minus $3\,{\rm fil}$; the final
depth will be zero, since |\box2| is followed by glue; the natural
height is $12\pt$; and the shrinkability is $5\,{\rm fil}$. So |\box4|
will be $4\pt$ high, $0\pt$ deep, $1\pt$ wide, and it will contain
five items: |\vskip|\penalty0\hbox{|-1.6pt|}, |\box1|, |\vskip1.2pt|,
|\moveleft4pt\box2|, |\vskip-1.6pt|. Starting at the reference point of
|\box4|, you get to the reference point of\/ |\box1| by going up $4.6\pt$,
or to the reference point of\/ |\box2| by going up $.4\pt$ and left $4\pt$.
\ (For example, you go up $4\pt$ to get to the upper left corner of
|\box3|; then down $-1.6\pt$, i.e., up $1.6\pt$, to get to the upper left
corner of\/ |\box1|; then down $1\pt$ to reach its reference point. This
problem is clearly academic, since it's rather ridiculous to include
infinite shrinkability in the baselineskip.)
\ddangerexercise Solve the previous problem but with |\boxmaxdepth=-4pt|.
\answer Now |\box4| will be $4\pt$ high, $-4\pt$ deep, $1\pt$ wide, and it
will contain |\vskip|\penalty0\hbox{|-2.4pt|}, |\box1|, |\vskip-1.2pt|,
|\moveleft4pt\box2|, |\vskip-2.4pt|. From the baseline of\/ |\box4|, go up
$5.4\pt$ to reach the baseline of\/ |\box1|, or $3.6\pt$ to reach the
baseline of\/ |\box2|.
\danger We have observed that |\vbox| combines a bunch of boxes into
a larger box that has the same baseline as the bottom box inside.
\TeX\ has another operation called ↑{*vtop}, which gives you a box like |\vbox|
but with the same baseline as the top box inside. For example,
\begintt
\hbox{Here are \vtop{\hbox{two lines}\hbox{of text.}}}
\endtt
produces
$$\displaybox{Here are \vtop{\hbox{two lines}\hbox{of text.}}}$$
\ddanger You can say `|\vtop to|\<dimen>' and `|\vtop expand|\<dimen>'
just as with |\vbox|, but you should realize what such a construction
means. \TeX\ implements |\vtop| as follows: \ (1)@First a@vertical box
is formed as if\/ |\vtop| had been |\vbox|, using all of the rules for
|\vbox| as given above. \ (2)@The final height@$x$ is defined to
be zero unless the very first item inside the new vbox is a box;
in the latter case, $x$ is the height of that box. \ (3)@Let $h$
and@$d$ be the height and depth of the vbox in step@(1). \TeX\ completes
the |\vtop| by moving the reference point up or down, if necessary, so
that the box has height@$x$ and depth $h+d-x$.
\ddangerexercise Describe the empty boxes that you get from
`|\vbox to|\<dimen>|{}|' and
`|\vtop to|\<dimen>|{}|'. What are their heights, depths, and widths?
\answer |\vbox to| $x$|{}| produces height $x$;
|\vtop to| $x$|{}| produces depth $x$; the other dimensions are zero.
\ (This holds even when $x$ is negative.)
\ddangerexercise Define a macro |\nullbox#1#2#3| that produces a box
whose height, depth, and width are given by the three parameters.
The box should contain nothing that will show up in print.
\answer There are several possibilities:
\begintt
\def\nullbox#1#2#3{\vbox to#1{\vss\hrule height-#2depth#2width#3}}
\endtt
works because the rule will be of zero thickness. Less tricky is
\begintt
\def\nullbox#1#2#3{\vbox to#1{\vss\vtop to#2{\vss\hbox to#3{}}}}
\endtt
Both of these are valid with negative height and/or depth, but they do
not produce negative width. If the width might be negative, but not the
height or depth, you can use, e.g.,
|\def\nullbox#1#2#3{\hbox to#3{\hss\raise#1\null\lower#2\null}}|.
It's impossible for |\hbox| or |\vbox| or |\vtop| to construct a box
whose height and width are both negative.
\danger The |\vbox| operation tends to produce boxes with large height and
small depth, while |\vtop| tends to produce small height and large depth.
If you're trying to make a vertical list out of big vboxes, however, you
may not be satisfied with either |\vbox| or |\vtop|; you might well wish
that a box had two reference points simultaneously, one for the top and
one for the bottom. If such a dual-reference-point scheme were in use, one
could define interline glue based on the distance between the lower
reference point of one box and the upper reference point of its successor
in a vertical list. But alas, \TeX\ gives you only one reference point per box.
\danger There's a way out of this dilemma, using an important idea
called a ``↑{strut}.'' Plain \TeX\ defines ↑{:strut} to be an invisible
box of width zero that extends just enough above and below the baseline
so that you would need no interline glue at all if every line contained
a strut. \ (Baselines are $12\pt$ apart in plain \TeX; it turns out that
|\strut| is a vertical rule, $8.5\pt$ high and $3.5\pt$ deep and
$0\pt$ wide.) \ If you contrive to put a strut on the top line and
another on the bottom line, inside your large vboxes, then it's possible to
obtain the correct spacing in a larger assembly by simply letting the
boxes butt together. For example, the ↑{:footnote} macro in Appendix@E
puts struts at the beginning and end of every footnote, so that the spacing
will be right when several footnotes occur together at the bottom
of some page. ↑(fitting boxes together)
\danger If you understand boxes and glue, you're ready to learn
the ↑{:rlap} and ↑{:llap} macros of plain \TeX; these names are abbreviations
for ``right ↑{overlap}'' and ``left overlap.'' Saying `|\rlap{|\<something>|}|'
is like typesetting \<something> and then backing up as if you hadn't
typeset anything. More precisely, `|\rlap{|\<something>|}|' creates a box
of width zero, with `\<something>' appearing just at the right of that box
(but not taking up any space). The |\llap| macro is similar, but it does
the ↑{backspacing} first; in other words, `|\llap{|\<something>|}|'
creates a box of width zero, with `\<something>' extending just to the
left of that box. Using typewriter type, for example, you can typeset
`{\tt\rlap/=}' by saying either `|\rlap/=|' or `|/\llap=|'. It's possible
to put text into the left margin using |\llap|, or into the right margin
using |\rlap|, because \TeX\ does not insist that the contents of a box
must be strictly confined within that box's boundaries. ↑(marginal notes)
\danger The interesting thing about |\rlap| and |\llap| is that they can be
done so simply with infinite glue. One way to define |\rlap| would be
\begintt
\def\rlap#1{{\setbox0=\hbox{#1}\copy0\hskip-1wd0}}
\endtt
but there's no need to do such a lengthy computation. The actual definition
in Appendix@B is much more elegant, namely,
\begintt
\def\rlap#1{\hbox to 0pt{#1\hss}}
\endtt
and it's worth pondering why this works. Suppose, for example, that you're
doing |\rlap{g}| where the letter `g' is $5\pt$ wide. Since |\rlap| makes
an hbox of width $0\pt$, the glue represented by ↑{*hss} must shrink by
$5\pt$. Well, that glue has $0\pt$ as its natural width, but it has
infinite shrinkability, so it can easily shrink to $-5\pt$; and
`|\hskip-5pt|' is exactly what |\rlap| wants in this case.
\dangerexercise Guess the definition of\/ |\llap|, without peeking at
Appendices A or@B.
\answer |\def\llap#1{\hbox to 0pt{\hss#1}}|
\dangerexercise (This is a sequel to exercise 12.\linexno,
but it's trickier.) \ Describe the result of
\begintt
\line{\hfil A puzzle.\hfilneg}
\endtt
\answer You get `A' at the extreme left and `puzzle.\null' at the extreme right,
because the space between words has the only stretchability that is finite;
the infinite stretchability cancels out. \ (In this case, \TeX's rule
about ↑{infinite glue} differs from what you would get in the limit if the
value of $1\,{\rm fil}$ were finite but getting larger and larger.
The true limiting behavior would stretch the text `A@puzzle.\null' in the
same way, but it would also move that text infinitely far away past the right
edge of the page.)
\endchapter
There was things which he stretched,
but mainly he told the truth.
\author MARK ↑{TWAIN}{↑(Clemens)}, {\sl Huckleberry Finn\/} (1884) % chap1
\bigskip
Every shape exists only in relation to the space around it;
$\ldots$ there is a `right' position for every shape on every occasion.
If we succeed in finding that position we have done our job.
\author JAN ↑{TSCHICHOLD}, {\sl Typographische Gestaltung\/} (1935)
\eject
\beginchapter Chapter 13. Modes
Just as people get into different moods, \TeX\ gets into different ``modes.'' \
(Except that \TeX\ is more predictable than people.) \ There are six ↑{modes}:
\medskip
\item\bull↑{Vertical mode}. [Building the main vertical list, from which
the pages of output are derived.]
\smallskip\item\bull
↑{Internal vertical mode}. [Building a vertical list for a vbox.]
\smallskip\item\bull
↑{Horizontal mode}. [Building a horizontal list for a paragraph.]
\smallskip\item\bull
↑{Restricted horizontal mode}. [Building a horizontal list for an hbox.]
\smallskip\item\bull
↑{Math mode}. [Building a mathematical formula to be placed in
a horizontal list.]
\smallskip\item\bull
↑{Displayed math mode}. [Building a mathematical formula to be placed
on a line by itself, temporarily interrupting the current paragraph.]
\medskip\noindent In simple situations, you don't need to be aware of what
mode \TeX\ is in, because the computer just does the right thing. But
when you get an error message that says `\thinspace|!|@|You| |can't| |do|
|such-and-such| |in| |restricted| |horizontal| |mode|\thinspace', a
knowledge of modes helps to explain why \TeX\ thinks you goofed.
Basically \TeX\ is in one of the vertical modes when it is preparing a list of
boxes and glue that will be placed vertically above and below one another on
the page; it's in one of the horizontal modes when it is preparing a list
of boxes and glue that will be strung out horizontally next to each other
with baselines aligned; and it's in one of the math modes when it is
reading a formula.
A play-by-play account of a typical \TeX\ job should make the mode idea clear:
At the beginning, \TeX\ is in vertical mode, ready to construct pages. If you
specify glue or a box when \TeX\ is in vertical mode, the glue or the box gets
placed on the current page below what has already been specified. For example,
the ↑{*vskip} instructions in the sample run we discussed in Chapter@6
contributed vertical glue to the page; and the ↑{*hrule} instructions
contributed horizontal rules at the top and bottom of the story. The
↑{:centerline} commands also produced boxes that were included in the main
vertical list; but those boxes required a bit more work than the rule boxes:
\TeX\ was in vertical mode when it encountered
`|\centerline{\bf A SHORT STORY}|', and it went temporarily into restricted
horizontal mode while processing the words `|A SHORT STORY|'; then
the digestive process returned to vertical mode, after setting the
glue in the@|\centerline|@box.
Continuing with the example of Chapter 6, \TeX\ switched into horizontal
mode as soon as it read the `|O|' of `|Once upon a time|'. Horizontal mode
is the mode for making ↑{paragraphs}. The entire paragraph (lines 7 to@11
of the |story| file) was input in horizontal mode; then the text was
divided into output lines of the appropriate width, those lines were
put in boxes and appended to the page (with appropriate interline glue between
them), and \TeX\ was back in vertical mode. The `|M|' on line@12 started up
horizontal mode again.
When \TeX\ is in vertical mode or internal vertical mode,
the first token of a new paragraph changes the mode to horizontal for the
duration of a paragraph. In other words, things that do not have a vertical
orientation cause the mode to switch automatically from vertical to
horizontal. This occurs when you type any character, or ↑{*char} or ↑{*accent}
or ↑{*hskip} or |\|\] ↑(escape space) or math shift (|$|); \TeX\ inserts
the current paragraph ↑{indentation} and rereads the horizontal token as
if it had occurred in horizontal mode.
\danger You can also tell \TeX\ explicitly to go into horizontal mode,
instead of relying on such implicit mode-switching, by saying `↑{*indent}'
or `↑{*noindent}'. For example, if line@7 of the |story| file in Chapter@6
had begun
\begintt
\indent Once upon a time, ...
\endtt
the same output would have been obtained, because `|\indent|' would have
instructed \TeX\ to begin the paragraph. And if that line had begun with
\begintt
\noindent Once upon a time, ...
\endtt
the first paragraph of the story would not have been indented. The
|\noindent| command simply tells \TeX\ to enter horizontal mode if the
current mode is vertical or internal vertical; |\indent| is similar,
but it also creates an empty box whose width is the current value of
↑{*parindent}, and it puts this empty box into the current horizontal list.
Plain \TeX\ sets |\parindent=20pt|.
If you say |\indent\indent|, you get double indentation; if you say
|\noindent\noindent|, the second |\noindent| does nothing.
\dangerexercise If you say `↑{*hbox}|{...}|' in horizontal mode, \TeX\ will
construct the specified box and it will contribute the result to the
current paragraph. Similarly, if you say `|\hbox{...}|' in vertical mode,
\TeX\ will construct a box and contribute it to the current page.
What can you do if you want to begin a paragraph with an |\hbox|?
\answer Simply saying |\hbox{...}| won't work, since that box will just
continue the previous vertical list without switching modes. You need
to start the paragraph explicitly, and the straightforward way to
do that is to say |\indent\hbox{...}|.
But suppose you want to define
a macro that expands to an hbox, where this macro is to be used in the
midst of a paragraph as well as at the beginning; then you don't want
to force users to type |\indent| before calling your macro at the
beginning of a paragraph, nor do you want to say |\indent| in the
macro itself (since that might insert unwanted indentations). One
solution to this more general problem is to say
`↑{:strut}|\hbox{...}|', since the occurrence of |\strut| in vertical
mode will begin a paragraph. Another solution is to say
`|\|\]↑{*unskip}|\hbox{...}|', since |\|\] makes the mode
horizontal while |\unskip| removes the unwanted space. Or you can say
`|\hskip0pt\hbox{...}|'. Yet another solution uses ↑{*ifvmode}.
When handling simple manuscripts, \TeX\ spends almost all of its time in
horizontal mode (making paragraphs), with brief excursions into vertical
mode (between paragraphs). A paragraph is completed when you type ↑{*par}
or when your manuscript has a blank line, since a blank line is converted
to |\par| by the reading rules of Chapter@8. A paragraph also ends when
you type certain things that are incompatible with horizontal mode.
For example, the command `|\vskip 1in|' on line@16 of Chapter@6's |story|
file was enough to terminate the paragraph about `|...beautiful
documents.|'; no |\par| was necessary, since |\vskip| introduced
vertical glue that couldn't belong to the@paragraph.
If a begin-math token (|$|) appears in
horizontal mode, \TeX\ plunges into math mode and processes the formula up until
the closing `|$|', then appends the text of this formula to the current
paragraph and returns to horizontal mode. Thus, in the ``I wonder why?''\
example of the previous chapter, \TeX\ went into math mode temporarily
while processing |\ldots|, treating the dots as a formula.
However, if two consecutive begin-math tokens appear in a paragraph (|$$|),
\TeX\ interrupts the paragraph where it is, contributes the paragraph-so-far
to the enclosing vertical list, then processes a math formula in display
math mode, then contributes this formula to the enclosing list, then
returns to horizontal mode for more of the paragraph. \ (The formula to be
displayed should end with `|$$|'.) \ For example, suppose you type
\begintt
the number $$\pi \approx 3.1415926536$$ is important.
\endtt
\TeX\ goes into display math mode between the |$$|'s, and the output you
get states that the number $$\pi \approx 3.1415926536$$ is important.
↑(:pi)
\smallskip
\TeX\ ignores blank spaces and blank lines (or ↑{*par} commands) when it's
in vertical or internal vertical mode, so you need not worry that
such things might change the mode or affect a printed document.
An ↑{escape space} (|\|\]) will, however, be regarded as the
beginning of a paragraph; the paragraph will start with a blank space
after the indentation.
\smallskip
At the end of a \TeX\ manuscript it's usually best to finish everything off
by typing `↑{:bye}', which is plain \TeX's abbreviation for
`|\vfill\eject\end|'. The `|\vfill|' gets \TeX\ into vertical
mode and inserts enough space to fill up the last page; `|\eject|' outputs
that last page; and `|\end|' sends the computer into its ↑{endgame} routine.
\danger \TeX\ gets into internal vertical mode when you ask it to construct
something from a vertical list of boxes (using |\vbox| or |\vtop| or |\vcenter|
or |\valign| or |\vadjust| or |\insert|). It gets into restricted
horizontal mode when you ask it to construct something from a horizontal list
of boxes (using |\hbox| or |\halign|). Box construction is discussed
in Chapters 12 and@21. We will see later that there is very little difference
between internal vertical mode and ordinary vertical mode; but restricted
horizontal mode forbids you from doing certain things that are legal in
ordinary horizontal mode. \ (See Chapter@25.)
\danger Whenever \TeX\ looks at a token of input to decide what should be
done next, the current mode has a potential influence on what that token
means. For example, ↑{*kern} specifies vertical spacing in vertical mode,
but it specifies horizontal spacing in horizontal mode; a math shift
character like `|$|' causes entry to math mode from horizontal mode, but
it causes exit from math mode when it occurs in math mode; two consecutive
math shifts (|$$|) appearing in horizontal mode will initiate displayed
math mode, but in restricted horizontal mode they simply denote an empty
math formula. \TeX\ uses the fact that some operations are inappropriate
in certain modes to help you recover from errors that might have crept
into your manuscript. Chapters 24 to@26 explain exactly what
happens to every possible token in every possible mode.
\danger \TeX\ often interrupts its work in one mode to do some task in
another mode, after which the original mode is resumed again.
For example, you can say `|\hbox{|' in any mode; when \TeX\ digests this,
it suspends whatever else it was doing and enters restricted horizontal
mode. The matching `|}|' will eventually cause the hbox to be completed,
whereupon the postponed task will be taken up anew. In this sense \TeX\ can
be in many modes simultaneously, but only the innermost
mode influences the calculations at any time; the other modes have
been pushed out of \TeX's consciousness.
\goodbreak
\danger One way to become familiar with \TeX's modes is to consider the
following curious test file called |modes.tex|, which exercises all the
modes at once:
$$\halign{\hbox to\the\parindent{\hfil\sevenrm#\ \ }\hfil\cr
1&|\tracingcommands=1|\cr\noalign{↑(*tracingcommands)}
2&|\hbox{|\cr
3&|$|\cr
4&|\vbox{|\cr
5&|\noindent$$|\cr
6&|x\showlists|\cr
7&|$$}$}\bye|\cr}$$
The first line of ↑{.modes.tex} tells \TeX\ to log every command it
receives; \TeX\ will produce diagnostic data whenever
|\tracingcommands| is positive. Indeed, if you run \TeX\ on |modes.tex|
you will get a |modes.log| file that includes the following information:
\begintt
{vertical mode: \hbox}
{restricted horizontal mode: blank space}
{math shift character $}
{math mode: blank space}
{\vbox}
{internal vertical mode: blank space}
{\noindent}
{horizontal mode: math shift character $}
{displayed math mode: blank space}
{the letter x}
\endtt
The meaning is that \TeX\ first saw an |\hbox| token in vertical mode;
this caused it to go ahead and read the `|{|' behind the scenes.
Then \TeX\ entered restricted horizontal mode, and
saw the blank space token that resulted from the end of line@2 in the
file. Then it saw a math shift character token (still in restricted
horizontal mode), which caused a shift to math mode; another
blank space came through. Then |\vbox| inaugurated internal
vertical mode, and |\noindent| instituted horizontal mode within that; two
subsequent |$| signs led to displayed math mode. \ (Only the first |$| was
shown by |\tracingcommands|, because that one caused \TeX\ to look ahead
for another.)
\danger The next thing in |modes.log| after the output above is
`|{|↑{*showlists}|}|'. This is another handy diagnostic command that
you can use to find out things that \TeX\ ordinarily keeps to itself; it
causes \TeX\ to display what lists it is working on in the current
mode and in all enclosing modes where the work has been suspended:
\begintt
### displayed math mode entered at line 5
\mathord
.\fam1 x
### internal vertical mode entered at line 4
prevdepth ignored
### math mode entered at line 3
### restricted horizontal mode entered at line 2
\glue 3.33333 plus 1.66666 minus 1.1111
spacefactor 1000
### vertical mode entered at line 0
prevdepth ignored
\endtt
In this case the lists represent five levels of activity, all
present at the end of line@6 of |modes.tex|.
The current mode is shown first, namely, displayed math mode, which began
on line@5. The current math list contains one ``↑{mathord}''
object, consisting of the letter@|x| in family@1. \ (Have patience and you
will understand what that means, when you learn about \TeX's math formulas.) \
Outside of displayed math mode comes internal vertical mode, to which \TeX\
will return when the paragraph containing the
displayed formula is complete. The vertical list
on that level is empty; `↑{.prevdepth ignored}' means that |\prevdepth|
has a value $\le-1000\pt$, so that the next interline glue will be omitted
(cf.@Chapter@12). The math mode outside of this internal vertical mode has
an empty list, likewise, but the restricted horizontal mode enclosing the math
mode contains some glue. Finally, we see the main vertical mode that
encloses everything; this mode was `|entered at line 0|', i.e., before
the file |modes.tex| was input; nothing has been contributed
so far to the vertical list on this outermost level.
\dangerexercise Why is there glue in one of these lists but not in the
others?
\answer The output of\/ |\tracingcommands| shows that four blank space tokens
were digested; these originated at the ends of lines 2,@3, 4, and@5. Only
the first had any effect, since blank spaces are ignored in math formulas
and in vertical modes.
\dangerexercise After this output of\/ |\showlists|, the |modes.log| file
contains further output from |\tracingcommands|. In fact, the next two
lines of that file are
\begintt
{math shift character $}
{horizontal mode: end-group character }}
\endtt
because the `|$$|' on line 7 finishes the displayed formula, and this
resumes horizontal mode for the paragraph that was interrupted.
What do you think are the next three lines of |modes.log|\thinspace?
\answer The |end-group character| finishes the paragraph and the |\vbox|,
and |\bye| stands for `|\vfill\eject\end|', so the next three commands are
\begintt
{math mode: math shift character $}
{restricted horizontal mode: end-group character }}
{vertical mode: \vfill}
\endtt
\dangerexercise Suppose \TeX\ has generated a document without ever
leaving vertical mode. What can you say about that document?
\answer It contains only mixtures of vertical glue and horizontal rules
whose reference points appear at the left of the page; there's no text.
\ddangerexercise Some of \TeX's modes cannot immediately enclose other modes;
for example, displayed math mode is never directly enclosed by horizontal
mode, even though displays occur within paragraphs, because an interrupted
paragraph-so-far of horizontal mode is always completed and
removed from \TeX's memory before the processing of a displayed formula
begins. Give a complete characterization of all pairs of consecutive
modes that can occur in the output of\/ |\showlists|.
\answer Vertical mode can occur only as the outermost mode; horizontal
mode and displayed math mode can occur only when immediately enclosed by
vertical or internal vertical mode; ordinary math mode cannot be
immediately enclosed by vertical or internal vertical mode; all other
cases are possible.
\endchapter
Every mode of life has its conveniences.
\author SAMUEL ↑{JOHNSON}, {\sl The Idler\/} (1758)
\bigskip
[Hindu musicians] have eighty-four modes,
of which thirty-six are in general use,
and each of which, it appears, has a peculiar expression,
and the power of moving some particular sentiment or affection.
\author MOUNTSTUART ↑{ELPHINSTONE}, {\sl History of India\/} (1841)
% III.vii.I.297
\eject
\beginchapter Chapter 14. How \TeX\ Breaks\\Paragraphs Into Lines
One of a typesetting system's chief duties is to take a long sequence of words
and to break it up into individual lines of t~e appropriate size.
For example, every paragraph of this manual has been broken into lines
that are 29@picas wide, but the author didn't have to worry about such
details when he composed the manuscript. \TeX\ chooses breakpoints
in an interesting way that considers each paragraph in its entirety; the
closing words of a paragraph can actually influence the appearance of the
first line. As a result, the spacing between words is as uniform as
possible, and the computer is able to reduce the number of times that
words must be hyphenated or formulas must be split between lines.
↑(H\&J, see hyphenation, line breaking, setting glue)
↑(justification, see setting glue, line breaking)
↑(quad left, see flush left)
↑(quad right, see flush right)
↑(quad middle, see :break)
The experiments of Chapter 6 have already illustrated the general ideas:
We discussed the notion of ``badness,'' and we ran into ``overfull'' and
``underfull'' boxes in difficult situations. We also observed that different
settings of \TeX's \hbox{|\tolerance|} parameter
will produce different effects; a higher tolerance
means that wider spaces are acceptable.
\TeX\ will find the absolutely best way to typeset any given paragraph,
according to its ideas of minimum badness. But such ``badness'' doesn't
account for everything, and if you rely entirely on
an automatic scheme you will occasionally encounter line breaks that are
not really the best on psychological grounds; this is inevitable, because
computers don't understand things the way people do (at least not yet).
Therefore you'll sometimes want to tell the machine that certain places
are not good breakpoints. Conversely, you will sometimes want to force a
break at a particular spot. \TeX\ provides a convenient way to avoid
psychologically bad breaks, so that you will be able to obtain results of
the finest quality by simply giving a few hints to the machine.
``↑{Ties}''---denoted by `|@|' in plain \TeX---are the key to
successful line breaking. ↑(auxiliary space, see tie) ↑(at sign)
Once you learn how to insert them, you will have graduated from the ranks
of ordinary \TeX nical typists to the select group of Distinguished
\TeX nicians. And it's really not difficult to train yourself to
insert occasional ties, almost without thinking, as you type a manuscript.
↑(line breaks, avoiding) ↑(breaks, avoiding bad)
When you type |@| it's the same as typing a space, except that \TeX\
won't break a line at this space. Furthermore, you shouldn't leave
any blanks next to the |@|, since they will count as additional spaces.
If you put |@| at the very end of a line in your input file, you'll get a
wider space than you want, because the \<return> that follows the |@|
produces an extra space.
We have already observed in Chapter@12 that it's generally a good idea to
type |@| after an abbreviation that does not come at the end of a sentence.
Ties also belong in several other places:
\smallskip
\item\bull In references to named parts of a document:
$$\halign{#\hfil&\hskip 80pt#\hfil\cr
|Chapter@12|&|Theorem@1.2|\cr
|Appendix@A|&|Table@\hbox{B-8}|\cr
|Figure@3|&|Lemmas 5 and@6|\cr}$$
\noindent(No |@| appears after `|Lemmas|' in the final example, since there's
no harm in having `5@and@6' at the beginning of a line. The use of\/ |\hbox|
is explained below.)
\smallbreak
\item\bull Between a person's forenames and between multiple surnames:
$$\halign{#\hfil&\hskip 40pt#\hfil\cr
|Donald@E. Knuth|&|Luis@I. Trabb@Pardo|\cr
|Bartel@Leendert van@der@Waerden|&|Charles@XII|\cr}$$
↑(Knuth) ↑(Trabb Pardo) ↑(van der Waerden) ↑(Charles XII)
\noindent
Note that it is sometimes better to hyphenate a name than to break it
between words; e.g., `Don-' and `ald@E.@Knuth' is more tolerable
than `Donald' and `E.@Knuth'. The previous rule can be regarded as a
special case of this one, since we may think of `Chapter@12' as a
compound name; another example is `|register@X|'. Sometimes a name is
so long that we dare not tie it all together, lest there be no way to
break the line: ↑(Vall\'ee Poussin)
\begintt
Charles Louis Xavier@Joseph de@la Vall\'ee@Poussin.
\endtt
\item\bull Between math symbols in apposition with nouns:
$$\halign{#\hfil\cr
|dimension@$d$ width@$w$ function@$f(x)$|\cr
|string@$s$ of length@$l$|\cr}$$
\noindent However, the last example should be compared with
\begintt
string@$s$ of length $l$@or more.
\endtt
\item\bull Between symbols in series:
$$\halign{#\hfil\cr
|1,@2, or@3|\cr
|$a$,@$b$, and@$c$.|\cr
|1,@2, \dots,@$n$.|\cr}$$
\item\bull When a symbol is a tightly-bound object of a preposition:
$$\halign{#\hfil\cr
|of@$x$|\cr
|from 0 to@1|\cr
|increase $z$ by@1|\cr
|in common with@$m$.|\cr}$$
\noindent This does not apply to compound objects:
\begintt
of $u$@and@$v$.
\endtt
\item\bull When mathematical phrases are rendered in words:
$$\halign{#\hfil&\hskip20pt#\hfil&\hskip20pt#\hfil\cr
|equals@$n$|&|less than@$\epsilon$|&|(given@$X$)|\cr
|mod@2|&|modulo@$p↑e$|&|for all large@$n$|\cr}$$
\noindent Compare `|is@15|' with `|is 15@times the height|'.
\medbreak
\item\bull When cases are being enumerated within a paragraph:
↑(enumerated cases within a paragraph)
$$\halign{#\hfil\cr
|(b)@Show that $f(x)$ is (1)@continuous; (2)@bounded.|\cr}$$
\noindent It would be nice to boil all of these rules down to one or two
simple principles, and it would be even nicer if the rules could be
automated so that keyboarding could be done without them; but subtle
semantic considerations seem to be involved. Therefore it's best to use
your own judgment with respect to ties. The computer needs your help.
A tie keeps \TeX\ from breaking at a space, but sometimes you want to
prevent the machine from breaking at a ↑{hyphen} or a ↑{dash}. This can be
done by using ↑{*hbox}, because \TeX\ will not split up the contents
of a box; boxes are indecomposable units, once they have been constructed.
We have already illustrated this principle in the `|Table@\hbox{B-8}|'
example considered earlier.
Another example occurs when you are typing the page numbers
in a ↑{bibliographic reference}: It doesn't look good to put \hbox{`22.'} on a
line by itself, so you can type `|\hbox{13--22}.|' to prohibit
breaking `\hbox{13--22}.' On the other hand, \TeX\ doesn't often choose
line breaks at hyphens, so you needn't bother to insert |\hbox| commands
unless you need to correct a bad break that \TeX\ has already made on a
previous run.
\exercise Here are some phrases culled from previous chapters of this
manual. How do you think the author typed them?
$$\halign{\indent#\hfil\cr
(cf.@Chapter@12).\cr
Chapters 12 and@21.\cr
line@16 of Chapter@6's {\tt story}\cr
lines 7 to@11\cr
lines 2,@3, 4, and@5.\cr
(2)@a big black bar\cr
All 128@characters are initially of category@12,\cr
letter@{\tt x} in family@1.\cr
the factor@$f$, where $n$@is 1000@times@$f$.\cr}$$
\answer{\obeylines%
|(cf.@Chapter@12).|
|Chapters 12 and@21.|
|line@16 of Chapter@6's {\tt story}|
|lines 7 to@11|
|lines 2,@3, 4, and@5.|
|(2)@a big black bar|
|All 128@characters are initially of category@12,|
|letter@{\tt x} in family@1.|
|the factor@$f$, where $n$@is 1000@times@$f$.|}
\exercise How would you type the phrase `for all $n$ greater than
$n_0$'\thinspace?
\answer `|for all $n$@greater than@$n_0$|' avoids distracting breaks.
\exercise And how would you type `exercise 4.3.2--15'\thinspace?
\answer `|exercise \hbox{4.3.2--15}|' guarantees that there is no break
after the ↑{en-dash}. But this precaution is rarely necessary, so
`|exercise 4.3.2--15|' is an acceptable answer. No |@| is needed;
`\hbox{4.3.2--15}' is so long that it causes no offense
at the beginning of a line.
\exercise Why is it better to type `|Chapter@12|' than to type
`|\hbox{Chapter 12}|'\thinspace?
\answer The space you get from |@| will stretch or shrink with the
other spaces in the same line, but the space inside an hbox has
a fixed width since that glue has already been set once and for all.
\dangerexercise \TeX\ will sometimes break a math formula after an
equals sign. How can you stop the computer from breaking the formula
`$x=0$'\thinspace?
\answer `|\hbox{$x=0$}|' is unbreakable, and we will see later that
`|${x=0}$|' cannot be broken. Both of these solutions set the glue
surrounding the equals sign to some fixed value, but such glue normally
wants to stretch; furthermore, the |\hbox| solution might include undesirable
blank space at the beginning or end of a line, if\/ ↑{*mathsurround} is
nonzero. A third solution `|$x=\nobreak0$|' avoids both defects.
\ddangerexercise Explain how you could instruct \TeX\ not to make any
breaks after explicit hyphens and dashes. \ (This is useful in
lengthy ↑{bibliographies}.)
\answer |\exhyphenpenalty=10000| prohibits all such breaks, according
to the rules found later in this chapter. Similarly, |\hyphenpenalty=10000|
prevents breaks after implicit (discretionary) hyphens.
Sometimes you want to permit a line break after a `/' just as if it were
a hyphen. For this purpose plain \TeX\ allows you to say `↑{:slash}';
for example, `|input\slash output|' produces `input\slash output' with
an optional break.
{\hbadness=10000
If you want to force \TeX\ to break between lines at a certain point in
↑(line breaks, forcing) ↑(breaks, forcing good)
the middle of a paragraph, just say `↑{:break}'. However, that might cause the
line to be really spaced out.\break
If you want \TeX\ to fill up the right-hand part of a line
with blank space just before a forced line break,\hfil\break
without indenting the next line, say `|\hfil\break|'.\par}
\danger You may have several consecutive lines of input
for which you want the output to appear line-for-line in the same way.
One solution is to type `|\par|' at the end of each input line; but that's
somewhat of a nuisance, so plain \TeX\ provides the abbreviation
`↑{:obeylines}', which causes each end-of-line in the input to be
like |\par|. After you say |\obeylines| you will get one line of
output per line of input, unless the input lines end with `|%|' or
unless they are so long that they must be broken. For example, you
probably want to use |\obeylines| if you are typesetting a ↑{poem}
or a computer ↑{program}.
Be sure to enclose |\obeylines| in a group, unless you want this
``poetry mode'' to continue to the end of your document.
\begintt
{\obeylines\smallskip
Roses are red,
\quad Violets are blue;
Rhymes can be typeset
\quad With boxes and glue.
\smallskip}
\endtt
\dangerexercise Explain the uses of\/ ↑{:quad} in this poem. What would
have happened if `|\quad|' had been replaced by `↑{*indent}' in both places?
\answer The second and fourth lines are indented by an additional ``quad''
of space, i.e., by one extra em in the current type style.
\ (The control sequence |\quad| does an ↑{*hskip}; when \TeX\ is in
vertical mode, |\hskip| begins a new paragraph and puts glue after the
indentation.) \ If\/ |\indent| had been used instead, those lines wouldn't
have been indented any more than the first and third, because |\indent| is
implicit at the beginning of every paragraph. Double indentation on the
second and fourth lines could have been achieved by `|\indent\indent|'.
Roughly speaking, \TeX\ breaks paragraphs into lines in the following way:
Breakpoints are inserted between words or after hyphens so as to produce lines
whose badnesses do not exceed the current ↑{*tolerance}. If there's no
way to insert such breakpoints, an overfull box is set. Otherwise the
breakpoints are chosen so that the paragraph is mathematically optimal, in
the sense that it has no more ``↑{demerits}'' than you could obtain by any
other sequence of breakpoints. Demerits are based on the badnesses of
individual lines and on the existence of such things as consecutive lines
that end with hyphens, or tight lines that occur next to loose ones.
\danger But the informal description of line breaking in the previous
paragraph is an oversimplification of what really happens. The remainder
of this chapter explains the details precisely, for people who want to
apply \TeX\ in nonstandard ways. \TeX's line-breaking algorithm
has proved to be general enough to handle a surprising variety of
different applications; this, in fact, is probably the most interesting
aspect of the whole \TeX\ system. However, every paragraph from now on
until the end of the chapter is prefaced by at least one dangerous bend
sign, so you may want to learn the following material in easy stages
instead of all at once.
\ninepoint
\danger Before the lines have been broken, a paragraph inside of \TeX\
is actually a {\sl ↑{horizontal list}}, i.e., a sequence of items that
\TeX\ has gathered while in horizontal mode. We have been saying
informally that a horizontal list consists of boxes and glue; the truth
is that boxes and glue aren't the whole story. Each item in a horizontal
list is one of the following types of things:\enddanger
\smallskip
\item\bull a box (a character or ligature or rule or hbox or vbox);
\item\bull a ↑{discretionary break} (to be explained momentarily);
↑(break, discretionary)
\item\bull a ``↑{whatsit}'' (something special to be explained later);
\item\bull vertical material (from ↑{*mark} or ↑{*vadjust} or ↑{*insert});
\item\bull a glob of ↑{glue} (or ↑{*leaders}, as we will see later);
\item\bull a ↑{kern} (something like glue that doesn't stretch or shrink);
\item\bull a ↑{penalty} (representing the undesirability of breaking here);
\item\bull ``↑{math-on}'' (beginning a formula) or ``↑{math-off}'' (ending a
formula).
\smallskip\noindent
The last four types (glue, kern, penalty, and math items)
are called {\sl ↑{discardable}}, since they
may change or disappear at a line break; the first four types are
called non-discardable, since they always remain intact. Many of the
things that can appear in horizontal lists have not been touched on yet
in this manual, but it isn't necessary to understand them in order to
understand line breaking. Sooner or later you'll learn how each of the
gismos listed above can infiltrate a horizontal list; and if you want to
get a thorough understanding of \TeX's internal processes, you can always
use ↑{*showlists} with various features of the language, in
order to see exactly what \TeX\ is doing.
\danger A discretionary break consists of three sequences of characters
called the {\sl pre-break}, {\sl post-break}, and {\sl no-break\/}
texts. The idea is that if a line break occurs here, the ↑{pre-break text}
will appear at the end of the current line and the ↑{post-break text} will
occur at the beginning of the next line; but if no break occurs, the
↑{no-break text} will appear in the current line. Users can specify
↑(*discretionary)
discretionary breaks in complete generality by writing
$$\displaybox{|\discretionary{|\<pre-break text>|}{|\<post-break
text>|}{|\<no-break text>|}|}$$
where the three texts consist entirely of characters and kerns.
For example, \TeX\ can hyphenate the word
`difficult' between the f's, even though this requires breaking the
`ffi' ligature into `f-' followed by an `fi' ligature, if the horizontal
list contains
\begintt
di\discretionary{f-}{fi}{ffi}cult.
\endtt
Fortunately you need not type such a mess yourself; \TeX's hyphenation algorithm
works behind the scenes, taking ↑{ligatures} apart and putting them
into discretionary breaks when necessary.
\danger The most common case of a discretionary break is a simple
discretionary hyphen
\begintt
\discretionary{-}{}{}
\endtt
for which \TeX\ accepts the abbreviation `↑{*-}'. The next most common case is
\begintt
\discretionary{}{}{}
\endtt
(an ``↑{empty discretionary}''), which \TeX\ automatically inserts after
`|-|' and after every ligature that ends with `|-|'. In the case of plain
\TeX, empty discretionaries are therefore inserted after ↑{hyphens} and
↑{dashes}.
\danger When \TeX\ ↑{hyphenates} words, it simply inserts discretionary
breaks into the horizontal list. For example, the words `|discretionary
hyphens|' are transformed into the equivalent of
\begintt
dis\-cre\-tionary hy\-phens
\endtt
if hyphenation becomes necessary. But \TeX\ refrains from hyphenating
any word that already contains a discretionary break, so you can use
explicit discretionaries to override \TeX's automatic method, in an
emergency.
\dangerexercise Some compound words in ↑{German} text change their spelling
when they are split between lines. For example, `backen' becomes `bak-ken'
and `Bettuch' becomes `Bett-tuch'. How can you instruct \TeX\ to produce
this effect?
\answer |ba\ck en| and |Be\ttt uch|, where the macros |\ck| and |\ttt|
are defined by
\begintt
\def\ck{\discretionary{k-}{k}{ck}}
\def\ttt{tt\discretionary{-}{t}{}}
\endtt
\TeX's hyphenation algorithm will not make such spelling changes automatically.
\danger In order to save time, \TeX\ tries first to break a paragraph
into lines without inserting any discretionary hyphens. This first pass
will succeed if a sequence of breakpoints is found for which none
of the resulting lines has a badness exceeding the current value of
↑{*pretolerance}. If the first pass fails, the method of Appendix@H is
used to hyphenate each word of the paragraph by inserting discretionary
breaks into the horizontal list, and a second attempt is
made using ↑{*tolerance} instead of\/ |\pretolerance|. When the lines
are fairly wide, as they are in this manual, experiments show that
the first pass succeeds more than 90\% of the time, and that fewer than
2@words per paragraph need to be subjected to the hyphenation algorithm,
on the average. But when the lines are very narrow the
first pass usually fails rather quickly. Plain \TeX\ sets |\pretolerance=100|
and |\tolerance=200| as the default values. If you make |\pretolerance=10000|,
the first pass will essentially always succeed, so hyphenations will not
be tried (and the spacing may not be very good); on the other hand if you make
|\pretolerance=-1|, \TeX\ will omit the first pass and will try to
hyphenate immediately.
\danger Line breaks can occur only in certain places within a horizontal
list. Roughly speaking, they occur between words and after hyphens, but in
actuality they are permitted in the following five cases:\enddanger
\smallskip
\item{a)} at glue, provided that this glue is immediately preceded by a
non-discardable item, and that it is not part of a math formula (i.e., not
between math-on and math-off).
\smallskip
\item{b)} at a kern, provided that this kern is immediately followed by
glue, and that it is not part of a math formula.
\smallskip
\item{c)} at a math-off that is immediately followed by glue.
\smallskip
\item{d)} at a penalty (which might have been inserted automatically in a
formula).
\smallskip
\item{e)} at a discretionary break.
\smallskip\noindent
Notice that if two globs of glue occur next to each other, the second one
will never be selected as a breakpoint, since it is preceded by glue (which
is discardable).
\danger Each potential breakpoint has an associated ``penalty,'' which
represents the ``aesthetic cost'' of breaking at that place. In cases
(a), (b),@(c), the penalty is zero; in case@(d) an explicit penalty
has been specified; and in case@(e) the penalty is the current value of
↑{*hyphenpenalty} if the pre-break text is nonempty, or the current value of
↑{*exhyphenpenalty} if the pre-break text is empty.
Plain \TeX\ sets |\hyphenpenalty=50| and |\exhyphenpenalty=50|.
\danger For example, if you say `↑{*penalty} |100|' at some point in a
paragraph, that position will be a legitimate place to break between
lines, but a penalty of 100 will be charged. If you say `\hbox{|\penalty-100|}'
you are telling \TeX\ that this is a rather good place to break, because
a negative penalty is really a ``↑{bonus}''; a line that ends with a bonus
might even have ``merits'' (negative demerits).
\danger Any penalty that is 10000 or more is considered to be so large
↑(infinite penalty) that \TeX\ will never break there. At the other
extreme, any penalty that is $-10000$ or less is considered to be so small
that \TeX\ will always break there. The ↑{:nobreak} macro of plain \TeX\
is simply an abbreviation for `|\penalty10000|', because this prohibits a
line break. A tie in plain \TeX\ is equivalent to `|\nobreak\|\]';
there will be no break at the glue represented by |\|\] in this
case, because glue is never a legal breakpoint when it is preceded by a
discardable item like a penalty.
\dangerexercise Guess how the ↑{:break} macro is defined in plain \TeX.
\answer |\def\break{\penalty-10000 }|
\dangerexercise What happens if you say |\nobreak\break| or
|\break\nobreak|?
\answer You get a forced break as if\/ |\nobreak| were not present, because
|\break| cannot be cancelled by another penalty. In general if you
have two penalties in a row, their combined effect is the same as a single
penalty whose value is the minimum of the two original values, unless
both of those values force breaks. \ (You get two breaks from
|\break\break|; the second one creates an empty line.)
\danger When a line break actually does occur, \TeX\ removes all discardable
items that follow the break, until coming to something non-discardable,
or until coming to another chosen breakpoint. For example, a sequence of
glue and penalty items will vanish as a unit, if no boxes intervene,
unless the optimum breakpoint sequence includes one or more of the penalties.
Math-on and math-off items act essentially as kerns that contribute the spacing
specified by ↑{*mathsurround}; such spacing will disappear into the line
break if a formula comes at the very end or the very beginning of a line,
because of the way the rules have been formulated above.
\ddanger The ↑{badness} of a line is an integer that is approximately 100
times the cube of the ratio by which the glue inside the line must stretch
or shrink to make an hbox of the required size. For example, if the line
has a total shrinkability of 10 points, and if the glue is being
compressed by a total of 9 points, the badness is computed to be@73 (since
$100\times(9/10)↑3=72.9$); similarly, a line that stretches by twice its
total stretchability has a badness of 800. But if the badness obtained by
this method turns out to be more than 10000, the value 10000 is used. \
$\bigl($See the discussion of ``↑{glue set ratio}''@$r$ and ``↑{glue set
order}''@$o$ in Chapter@12; if $o≠0$, there is infinite stretchability or
shrinkability, so the badness is zero, otherwise the badness is
approximately $\min(100r↑3,10000)$.$\bigr)$ \ Overfull boxes are
considered to be infinitely bad; they are avoided whenever possible.
↑(infinite badness)
\ddanger A line whose badness is 13 or more has a glue set ratio exceeding
50\%. We call such a line {\sl ↑{tight}\/} if its glue had to shrink,
{\sl ↑{loose}\/} if its glue had to stretch, and {\sl ↑{very loose}\/}
if it had to stretch so much that the badness is 100 or more. But if the
badness is 12 or less we say that the line is {\sl ↑{decent}}. Two
adjacent lines are said to be {\sl {visually incompatible}\/} if their
classifications are not adjacent, i.e., if a tight line is next to a
loose or very loose line, or if a decent line is next to a very loose one.
\ddanger \TeX\ rates each potential sequence of breakpoints by totalling
up {\sl ↑{demerits}\/} that are assessed to individual lines. The goal
is to choose breakpoints that yield the fewest total demerits. Suppose that a
line has badness@$b$, and suppose that the penalty@$p$ is associated with
the breakpoint at the end of this line. As stated above, \TeX\ will not
even consider such a line if $p\ge10000$, or if $b$@exceeds the current
tolerance or pretolerance. Otherwise the demerits of such a line are
defined by the formula
$$\display{d=\cases{
(l+b)↑2+p↑2,&if $0\le p<10000$;\cr
(l+b)↑2-p↑2,&if $-10000<p<0$;\cr
(l+b)↑2,&if $p\le-10000$.\cr}}$$
Here $l$ is the current value of\/ ↑{*linepenalty}, a parameter that can be
increased if you want \TeX\ to try harder to keep all paragraphs to the
minimum number of lines; plain \TeX\ sets |\linepenalty=10|. For example,
a line with badness@20 ending at glue will have $(10+20)↑2=900$ demerits,
if $l=10$, since there's no penalty for a break at glue. Minimizing the
total demerits of a paragraph is roughly the same as minimizing the
sum of the squares of the badnesses and penalties; this usually means
that the maximum badness of any individual line is also minimized, over
all sequences of breakpoints.
\ddangerexercise The formula for demerits has a strange discontinuity: it
seems more reasonable at first to define $d=(l+b)↑2-10000↑2$, in the
case $p\le-10000$. Can you account for this apparent discrepancy?
\answer Breaks are forced when $p\le-10000$, so there's no point in
subtracting a large constant whose effect on the total demerits is
known {\sl a priori}, especially when that might cause arithmetic overflow.
\ddanger Additional demerits are assessed based on pairs of adjacent lines.
If two consecutive lines are visually incompatible, in the sense explained
above, the current value of\/ ↑{*adjdemerits} is added to@$d$. If two
consecutive lines end with discretionary breaks, the ↑{*doublehyphendemerits}
are added. And if the second-last line of the entire paragraph ends with
a discretionary, the ↑{*finalhyphendemerits} are added. Plain \TeX\ sets
up the values |\adjdemerits=10000|, |\doublehyphendemerits=10000|,
and \hbox{|\finalhyphendemerits=1000|}. Demerits are in units of
``badness squared,'' so the demerit-oriented parameters need to be rather
large if they are to have much effect; but tolerances and
penalties are given in the same units as badness.
\ddanger If you set ↑{*tracingparagraphs}|=1|, your log file will contain a
summary of \TeX's line-breaking calculations, so you can watch the tradeoffs
that occur when parameters like |\linepenalty| and |\hyphenpenalty| and
|\adjdemerits| are twiddled. The line-break data looks pretty scary at first,
but you can learn to read it with a little practice; this, in fact, is the
best way to get a solid understanding of line breaking. Here is the
trace that results from the second paragraph of the |story| file in
Chapter@6, when |\hsize=2.5in| and |\tolerance=1000|:
$$\halign{\indent#\hfil\cr
|[]\tenrm Mr. Drofnats---or ``R. J.,'' as he pre-|\cr
|@\discretionary via @@0 b=0 p=50 d=2600|\cr
|@@1: line 1.2- t=2600 -> @@0|\cr
|ferred to be called---was hap-pi-est when |\cr
|@ via @@1 b=127 p=0 d=28769|\cr
|@@2: line 2.0 t=31369 -> @@1|\cr
|he |\cr
|@ via @@1 b=26 p=0 d=1296|\cr
|@@3: line 2.3 t=3896 -> @@1|\cr
|was at work type-set-ting beau-ti-ful doc-|\cr
|@\discretionary via @@2 b=1 p=50 d=12621|\cr
|@\discretionary via @@3 b=289 p=50 d=101901|\cr
|@@4: line 3.2- t=43990 -> @@2|\cr
|u-|\cr
|@\discretionary via @@3 b=43 p=50 d=15309|\cr
|@@5: line 3.1- t=19205 -> @@3|\cr
|ments. |\cr
|@\par via @@4 b=0 p=-10000 d=1100|\cr
|@\par via @@5 b=0 p=-10000 d=1100|\cr
|@@6: line 4.2- t=20305 -> @@5|\cr
}$$
Lines that begin with `|@@|' ↑(atsign atsign) represent {\sl↑{feasible
breakpoints}}, i.e., breakpoints that can be reached without any badness
exceeding the tolerance. Feasible breakpoints are numbered consecutively,
starting with |@@1|; the beginning of the paragraph is considered to be
feasible too, and it is number |@@0|. Lines that begin with `|@|' but
not `|@@|' are candidate ways to reach the feasible breakpoint that
follows; \TeX\ will select only the best candidate, when there is a choice.
Lines that do not begin with `|@|' indicate how far \TeX\ has gotten in the
paragraph. Thus, for example, we find `|@@2: line 2.0 t=31369 -> @@1|'
after `|...hap-pi-est when|' and before `|he|', so we know that feasible
breakpoint@|@@2| occurs at the space between the words |when| and |he|.
The notation `|line 2.0|' means that this feasible break comes at the end
of line@2, and that this line will be very loose. \ (The suffixes
|.0|, |.1|, |.2|, |.3| stand respectively for very loose, loose, decent,
and tight.) \ A hyphen is suffixed to the line number if that line
ends with a discretionary break, or if it is the final line of the
paragraph; for example, `|line 1.2-|' is a decent line that was hyphenated.
The notation `|t=31369|' means that the total demerits from the beginning
of the paragraph to@|@@2| are 31369, and `|-> @@1|' means that the best
way to get to |@@2| is to come from |@@1|. On the preceding line of trace
data we see the calculations for a typeset line to this point from |@@1|:
the badness is@127, the penalty is@0, hence there are 28769 demerits.
Similarly, breakpoint |@@3| presents an alternative for the second line of
the paragraph, obtained by breaking between `|he|' and `|was|'; this one
makes the second line tight, and it has only 3896 demerits when the
demerits of line@1 are added, so it appears that |@@3| will work much
better than |@@2|. However, the next feasible breakpoint (|@@4|) occurs
after `|doc-|', and the line from |@@2| to@|@@4| has only 12621 demerits,
while the line from |@@3| to@|@@4| has a whopping 101901; therefore
the best way to get from |@@0| to@|@@4| is via@|@@2|. If we regard
demerits as distances, \TeX\ is finding the ``↑{shortest paths}'' from
|@@0| to each feasible breakpoint (using a variant of a well-known
algorithm for shortest paths in an acyclic graph). Finally the end of
the paragraph comes at breakpoint |@@6|, and the shortest path from
|@@0| to@|@@6| represents the best sequence of breakpoints. Following
the arrows backwards from@|@@6|, we see that the best breaks in this
particular paragraph go through
|@@5|, |@@3|, and@|@@1|.
\ddangerexercise Explain why there are 28769 demerits from |@@1| to |@@2|,
and 12621 demerits from@|@@2| to@|@@4|.
\answer $(10+127)↑2+0↑2+10000=28769$ and $(10+1)↑2+50↑2+10000=12621$.
In both cases the ↑{*adjdemerits} were added because the lines were
visually incompatible (decent, then very loose, then decent); and plain
\TeX's values for ↑{*linepenalty} and |\adjdemerits| were used.
\ddanger If `|b=*|' ↑(.*) appears in such trace data, it means that an
infeasible breakpoint had to be chosen because there was no feasible
alternative.
\danger We still haven't discussed the special trick that allows the
final line of a paragraph to be shorter than the others. Just before
\TeX\ begins to choose breakpoints, it does two important things: \
(1)@If the final item of the current horizontal list is glue,
↑(*unskip)
that glue is discarded. \ (The reason is that a blank space
often gets into a token list just before ↑{*par} or just before |$$|,
and this blank space should not be part of the paragraph.) \ (2)@Three more
items are put at the end of the current horizontal list: |\penalty10000|
(which prohibits a line break); |\hskip\the\parfillskip| (which adds
``↑{finishing glue}'' to the paragraph); and |\penalty-10000| (which
forces the final break). Plain \TeX\ sets ↑{*parfillskip}|=0pt plus1fil|,
so that the last line of each paragraph will be filled with white space
if necessary; but other settings of\/ |\parfillskip| are appropriate in
special applications. For example, the present paragraph ends flush with
the right margin, because it was typeset with |\parfillskip=0pt|;
the author didn't have to rewrite any of the text in order to make this
possible, since a long paragraph generally allows so much flexibility that
a line break can be forced at almost any point. You can have some fun
playing with paragraphs, because the algorithm for line breaking
occasionally appears to be clairvoyant. Just write paragraphs
that are long enough.\parfillskip=0pt
% the \danger macro makes this \parfillskip local!
\dangerexercise Ben ↑{User} decided to say `|\hfilneg\par|' at the end of
a paragraph, intending that the negative stretchability of\/ ↑{*hfilneg}
would cancel with the |\parfillskip| of plain \TeX. Why didn't his
bright idea work? ↑(paragraph, ending)
\answer Because \TeX\ discards a glue item that occurs just before
|\par|. Ben should have said, e.g., `|\hfilneg\ \par|'.
\dangerexercise How can you set |\parfillskip| so that the last line
of a paragraph has exactly as much white space at the right as the
first line has indentation at the left?
\answer Say |\parfillskip=\the\parindent|. Of course, \TeX\ will not be
able to find appropriate line breaks unless each paragraph is sufficiently
long or sufficiently lucky; but with an appropriate text, your output
will be immaculately symmetrical.{\parfillskip=\the\parindent\par}
\ddangerexercise Since \TeX\ reads an entire paragraph before it makes
any decisions about line breaks, the computer's memory might fill up
↑(capacity exceeded) if you are typesetting the works of some
↑{philosopher} who writes 200-line paragraphs. Suggest a way to
cope with such authors.
\answer Assuming that the author is deceased and/or set in his or her
ways, the remedy is to insert `|{\parfillskip=0pt\par\parskip=0pt\noindent}|'
in random places, after each 50 lines or so of text. \ (Every space
between words is usually a feasible breakpoint, when you get sufficiently
far from the beginning of a paragraph.)
\danger \TeX\ has two parameters called |\leftskip| and |\rightskip| that
specify glue to be inserted at the left and right of every line in a
paragraph; this glue is taken into account when badnesses and demerits are
computed. Plain \TeX\ normally keeps |\leftskip| and |\rightskip| zero,
but it has a `↑{:narrower}' macro that increases both of their values by
$20\pt$. You may want to use |\narrower| when ↑{quoting} lengthy passages
from a book.
\begintt
{\narrower\smallskip\noindent
This paragraph will have narrower lines than
the surrounding paragraphs do, because it
uses the ``narrower'' feature of plain \TeX.
The former margins will be restored after
this group ends.\smallskip}
\endtt
(Try it.) \ The second `↑{:smallskip}' in this example ends the paragraph.
It's important to end the paragraph before ending the group, for otherwise
the effect of\/ |\narrower| would disappear before \TeX\ began to choose
line breaks.
\dangerexercise When an entire paragraph is typeset in ↑{italic} or ↑{slanted}
type, it sometimes appears to be offset on the page with respect to
other paragraphs. Explain how you could use |\leftskip| and |\rightskip|
to shift all lines of a paragraph left by $1\pt$.
\answer |{\leftskip=-1pt \rightskip=1pt| \<text> |\par}|\par
\noindent(This applies to a full paragraph; if you want only to correct
isolated lines, you have to do it by hand.)
\dangerexercise The ↑{:centerline}, ↑{:leftline}, ↑{:rightline}, and ↑{:line}
macros of plain \TeX\ don't take |\leftskip| and |\rightskip| into
account. How could you make them do so?
\answer |\def\line#1{\hbox to\the\hsize{\hskip\the\leftskip|\parbreak
| #1\hskip\the\rightskip}}|\par
\nobreak\smallskip\noindent is the only change needed. \ (Incidentally,
↑{displayed equations} don't take account of\/ |\leftskip| and |\rightskip|
either; it's more difficult to change that, because so many variations
are possible.)
\ddanger If you suspect that ↑{:raggedright} setting is accomplished by
some appropriate manipulation of\/ |\rightskip|, you are correct. But some
care is necessary. For example, a person can
set |\rightskip=0pt plus1fil|, and every
line will be filled with space at the right. But this isn't a particularly
good way to do ragged right setting, because the infinite stretchability
will assign zero badness to lines that are very short.
To do a decent job of ragged right setting, the trick is to set
|\rightskip| so that it will stretch enough to make line breaks possible,
yet not too much, because short lines should be considered bad. Furthermore
the spaces between words should be fixed so that they do not stretch
or shrink. \ (See the definition of\/ |\raggedright| in Appendix@B\null.) \
It would also be possible to allow a little variability in the interword
glue, so that the right margin would not be quite so ragged but the
paragraphs would still have an informal appearance.
\danger \TeX\ looks at the parameters that affect
line breaking only when it is breaking lines. For example, you shouldn't
try to change the ↑{*hyphenpenalty} in the middle of a paragraph,
if you want \TeX\ to penalize the hyphens in one word more than it does in
another word. The relevant values of
|\hyphenpenalty|, |\rightskip|, |\hsize|, and so on,
are the ones that are current at the end of the paragraph.
On the other hand, the width of indentation that you get
implicitly at the beginning of a paragraph or when you say `↑{*indent}'
is determined by the value of\/ ↑{*parindent} at the time the
indentation is contributed to the current horizontal list,
not by its value at the end of the paragraph. Similarly, penalties
that are inserted into math formulas within a paragraph are based
on the values of\/ ↑{*binoppenalty} and ↑{*relpenalty} that are current
at the end of each particular formula. Appendix@D contains an example
that shows how to have both ragged right and ragged left margins within
a single paragraph, without using |\leftskip| or |\rightskip|.
\varunit 1.078pt % getting ready to make circular insert
\setbox0=\vtop{\null
\baselineskip6vu
\parfillskip0pt
\parshape 19
-18.25vu 36.50vu
-30.74vu 61.48vu
-38.54vu 77.07vu
-44.19vu 88.39vu
-48.47vu 96.93vu
-51.70vu 103.40vu
-54.08vu 108.17vu
-55.72vu 111.45vu
-56.68vu 113.37vu
-57.00vu 114.00vu
-56.68vu 113.37vu
-55.72vu 111.45vu
-54.08vu 108.17vu
-51.70vu 103.40vu
-48.47vu 96.93vu
-44.19vu 88.39vu
-38.54vu 77.07vu
-30.74vu 61.48vu
-18.25vu 36.50vu
\fiverm
\frenchspace
\noindent
\hbadness 6000
\tolerance 9999
\pretolerance 0
\hyphenation{iso-peri-met-ric}
The area of a circle is a mean proportional
between any two regular and similar polygons of which one
circumscribes it and the other is isoperimetric with it.
In addition, the area of the circle is less than that of any
circumscribed polygon and greater than that of any
isoperimetric polygon. And further, of these
circumscribed polygons, the one that has the greater number of sides
has a smaller area than the one that has a lesser number;
but, on the other hand, the isoperimetric polygon that
has the greater number of sides is the larger.
\hbox to 36.50vu{\hss[Galileo,\thinspace1638]\hss}
}
\danger
\parshape 16
3pc 26pc
3pc 26pc
0pc 24.69pc
0pc 23.51pc
0pc 22.73pc
0pc 22.20pc
0pc 21.85pc
0pc 21.65pc
0pc 21.58pc
0pc 21.65pc
0pc 21.85pc
0pc 22.20pc
0pc 22.73pc
0pc 23.51pc
0pc 24.69pc
0pc 29pc
\vadjust{\moveright 28pc\vbox to 0pt{\vskip88pt\vskip-60vu\vskip-3pt\box0\vss}}
\strut It's possible to control the length of lines in a much more general
way, if simple changes to |\leftskip| and |\rightskip| aren't
flexible enough for your purposes. For example, a semicircular
↑{hole} has been cut out of the present paragraph, in order to make
room for a circular illustration that contains some of ↑{Galileo}'s
immortal words about ↑{circle}s; all of the line breaks in this paragraph
and in the circular quotation were found by \TeX's line-breaking
algorithm. You can specify an essentially arbitrary paragraph
shape by saying ↑{*parshape}|=|\<number>, where the \<number> is
a positive integer $n$, followed by $2n$ \<dimen> specifications.
In general, `|\parshape=|$n$ $i_1$@$l_1$ $i_2$@$l_2$ $\ldots$ $i_n$@$l_n$'
specifies a paragraph whose first $n$ lines will have lengths
$l_1$, $l_2$, \dots,@$l_n$, respectively, and they will be
indented from the left margin by the respective amounts
$i_1$, $i_2$, \dots,@$i_n$. If the paragraph has fewer than
$n$ lines, the additional specifications will be ignored;
if it has more than $n$ lines, the specifications for line $n$ will
be repeated ad infinitum. You can cancel the effect of a previously
specified |\parshape| by saying `|\parshape=0|'.\parfillskip0pt
↑(illustrations, fitting copy around)
\ddangerexercise Typeset the following ↑{Pascal}ian quotation in
the shape of an isosceles ↑{triangle}:
``I turn, in the following treatises, to various uses of those
triangles whose generator is unity. But I leave out many more
than I include; it is extraordinary how fertile in properties
this triangle is. Everyone can try his hand.''
\answer The author's best solution is based on an unspecified ↑{*varunit}:
\begintt
\setbox1=\hbox{I}
\setbox0=\vbox{\parshape=11 -0vu0vu -1vu2vu -2vu4vu -3vu6vu
-4vu8vu -5vu10vu -6vu12vu -7vu14vu -8vu16vu -9vu18vu -10vu20vu
\ifdim 1vu>2em \rightskip=-1wd1
\else \frenchspace \rightskip=-1wd1 plus1pt minus1pt
\leftskip=0pt plus 1pt minus1pt \fi
\parfillskip=0pt \tolerance=1000 \noindent I turn, ... hand.}
\centerline{\hbox to 1wd1{\box0\hss}}
\endtt
Satisfactory results are obtained with font cmr10 when |\varunit| is set to
$8.9\pt$, $13.4\pt$, $18.1\pt$, $22.6\pt$, $32.6\pt$, and $47.2\pt$,
yielding triangles that are respectively 11,@9, 8, 7, 6, and@5 lines tall.
\danger You probably won't need unusual parshapes very often. But there's
a special case that occurs rather frequently, so \TeX\ provides a special
abbreviation for it in terms of two parameters called ↑{*hangindent} and
↑{*hangafter}. The command `|\hangindent=|\<dimen>' specifies a so-called
↑{hanging indentation}, and the command
`|\hangafter=|\<number>' specifies the duration
of that indentation. Let $x$ and $n$ be the values of\/ |\hangindent| and
|\hangafter|, and let $h$ be the value of\/ ↑{*hsize}; then if $n\ge0$,
hanging indentation will occur on lines $n+1$, $n+2$, $\ldots$ of the
paragraph, but if $n<0$ it will occur on lines 1,@2,
\dots,@$\vert n\vert$. Hanging indentation means that lines will
be of width $h-\vert x\vert$ instead of their normal width@$h$;
if $x\ge0$, the lines will be indented at the left margin, otherwise
they will be indented at the right margin. For example, the
``dangerous bend'' paragraphs of this manual have a hanging indentation
of 3@picas that lasts for two lines; they were set with |\hangindent=3pc|
and |\hangafter=-2|.
\danger Plain \TeX\ uses hanging indentation in its `↑{:item}' macro, which
produces a paragraph in which every line has the same indentation as a
normal |\indent|. Furthermore, |\item| takes a parameter that is placed
into the position of the identation on the first line. Another macro called
`↑{:itemitem}' does the same thing but with double indentation.
For example, suppose you type
\begintt
\item{1.} This is the first of several cases that are being
enumerated, with hanging indentation applied to entire paragraphs.
\itemitem{a)} This is the first subcase.
\itemitem{b)} And this is the second subcase. Notice
that subcases have twice as much hanging indentation.
\item{2.} The second case is similar.
\endtt
{\let\par=\endgraf Then you get the following output:
\medskip
\item{1.} This is the first of several cases that are being
enumerated, with hanging indentation applied to entire paragraphs.
\itemitem{a)} This is the first subcase.
\itemitem{b)} And this is the second subcase. Notice
that subcases have twice as much hanging indentation.
\item{2.} The second case is similar.
\medskip}\noindent\hangindent0pt
(Indentations in plain \TeX\ are not actually as dramatic as those
displayed here; Appendix@B
says `|\parindent=20pt|', but this manual has been set with
|\parindent=36pt|.) \ It is customary to put ↑{:medskip} before and after
a group of itemized paragraphs, and to say |\noindent|
before any closing remarks that apply to all of the cases.
↑(enumerated cases in separate paragraphs)
\dangerexercise Suppose one of the enumerated cases continues for two
or more paragraphs. How can you use |\item| to get hanging indentation
on the subsequent paragraphs?
\answer |\item{}| at the beginning of each paragraph that wants hanging
indentation.
\dangerexercise Explain how to make a ``↑{bullet}ed'' item that says `$\bullet$'
instead of `1.'.
\answer |\item{$\bullet$}|
\ddangerexercise The `|\item|' macro doesn't alter the right-hand margin. How
could you indent at both sides?
\answer Either change |\hsize| or |\rightskip|. The trick is to change it back
again at the end of a paragraph. Here's one way, without grouping:
\begintt
\let\endgraf=\par \edef\restorehsize{\hsize=\the\hsize}
\def\par{\endgraf \restorehsize \let\par=\endgraf}
\setdimen0=\the\hsize \advdimen0 by\minusthe\parindent
\hsize=\dimen0
\endtt
\ddangerexercise Explain how you could specify a hanging indentation
of $-2$ ems (i.e., the lines should project into the left margin),
after the first two lines of a paragraph.
\answer |\setdimen0=\the\hsize \advdimen0 by 2em|\parbreak
|\parshape=3 0pt\the\hsize 0pt\the\hsize -2em\dimen0|
\danger If\/ |\parshape| and hanging indentation have both been specified,
|\parshape| takes precedence and |\hangindent| is ignored. You get the
normal paragraph shape, in which every line width is |\hsize|, when
|\parshape=0|, |\hangindent=0pt|, and |\hangafter=1|. \TeX\ automatically
restores these normal values at the end of every paragraph, and (by
local definitions) whenever it enters internal vertical mode. For example,
hanging indentation that might be present outside of a ↑{*vbox} construction
won't occur inside that vbox, unless you ask for it inside.
↑(paragraph shape reset) ↑(hanging indentation reset)
\ddangerexercise Suppose you want to leave room at the right margin for
a rectangular illustration that takes up 15 lines, and you expect that
three paragraphs will go by before you have typeset enough text to get
past that illustration. Suggest a good way to do this without trial and error,
given the fact that \TeX\ resets hanging indentation.
\answer The three paragraphs can be combined into a single paragraph, if
you use `|\hfil\vadjust{\vskip\the\parskip}\break\indent|' instead of
`|\par|' after the first two. Then of course you say, e.g.,
|\hangindent=-50pt \hangafter=-15|. \ (The same idea can be applied in
connection with |\looseness|, if you want \TeX\ to make one of three
paragraphs looser but if you don't want to choose which one it will be.
However, long paragraphs fill \TeX's memory; please use restraint.) \
See also the next exercise.
\ddanger If ↑{displayed equations} occur in a paragraph that has a nonstandard
shape, \TeX\ always assumes that the display takes up exactly three lines.
For example, a paragraph that has four lines of text, then a display, then
two more lines of text, is considered to be $4+3+2=9$ lines long; the
displayed equation will be indented and centered using the paragraph shape
information appropriate to line@6.
\ddanger \TeX\ has an internal integer variable called ↑{*prevgraf} that
records the number of lines in the most recent paragraph that has been
completed or partially completed. You can look at this number by saying
|\the\prevgraf|, and you can set |\prevgraf| to some nonnegative value
if you want to make \TeX\ think that it is in some particular part of the
current paragraph shape. For example, let's consider again a paragraph
that contains four lines plus a display plus two more lines. When \TeX\
starts the paragraph, it sets |\prevgraf=0|; when it starts the display,
|\prevgraf| will be@4; when it finishes the display, |\prevgraf| will
be@7; and when it ends the paragraph, |\prevgraf| will be@9. If the
display is actually one line taller than usual, you could set
|\prevgraf=8| at the beginning of the two final lines; then \TeX\ will
think it has made a 10-line paragraph. The value of\/ |\prevgraf| affects
line breaking only when \TeX\ is dealing with nonstandard |\parshape|
or |\hangindent|.
\edef\lastex{\chapno.\the\count\exno}
\ddangerexercise Solve exercise \lastex\ using |\prevgraf|.
\answer Use |\hangcarryover| between paragraphs, defined as follows:
\begintt
\def\hangcarryover{\edef\next{\hangafter=\the\hangafter
\hangindent=\the\hangindent}
\par\next
\edef\next{\prevgraf=\the\prevgraf}
\indent\next}
\endtt
\ddanger You are probably convinced by now that \TeX's line-breaking algorithm
has plenty of bells and whistles, perhaps even too many.
But there's one more feature,
called ``looseness''; some day you might find yourself needing it,
when you are fine-tuning the pages of a book. If you set |\looseness=1|,
\TeX\ will try to make the current paragraph one line longer than its
optimum length, provided that there is a way to choose such breakpoints
without exceeding the tolerance you have specified for the badnesses
of individual lines. Similarly, if you set |\looseness=2|, \TeX\ will
try to make the paragraph two lines longer; and |\looseness=-1| causes an
attempt to make it shorter. The general idea is that \TeX\ first finds
breakpoints as usual; then if the optimum breakpoints produce
$n$@lines, and if the current ↑{*looseness} is@$l$, \TeX\ will choose
the final breakpoints so as to make the final number of lines as close
as possible to $n+l$ without exceeding the current tolerance. Furthermore,
the final breakpoints will have fewest total demerits, considering all ways
to achieve the same number of@lines.
\ddanger For example, you can set |\looseness=1| if you want to avoid
a lonely ``↑{club line}'' or ``↑{widow line}'' on some page that does not
have sufficiently flexible glue, or if you want the total number of
lines in some two-column document to come out to be an even number.
It's usually best to choose a paragraph that is already pretty ``full,''
i.e., one whose last line doesn't have much white space, since such
paragraphs can generally be loosened without much harm. You might
also want to insert a ↑{tie} between the last two words of that paragraph,
so that the loosened version will not end with only one ``↑{widow word}'' on the
↑(orphan, see widow word)
line; this tie will cover your tracks, so that people will find it hard to
detect the fact that you have tampered with the spacing. On the other
hand, \TeX\ can take almost any sufficiently long paragraph and stretch
it a bit, without substantial harm; the present paragraph is, in fact,
one line looser than \hbox{its optimum length}.\looseness=1
\ddanger \TeX\ resets the looseness to zero at the same time as it resets
|\hangindent|, |\hangafter|, and |\parshape|.
\ddangerexercise Explain what \TeX\ will do if you set |\looseness=-1000|.
\answer It will set the current paragraph in the minimum number of lines
that can be achieved without violating the tolerance; and, given that
number of lines, it will break them optimally. \ (However, nonzero
looseness makes \TeX\ work harder, so this is not recommended if you
don't want to pay for the extra computation. You can achieve almost the
same result much more efficiently by setting ↑{*linepenalty}|=100|, say.)
\danger Just before switching to horizontal mode to begin scanning a
paragraph, \TeX\ inserts the glue specified by ↑{*parskip} into the vertical
list that will contain the paragraph, unless that vertical list is empty so
far. For example, `|\parskip=3pt|' will cause 3@points of extra space
to be placed between paragraphs. Plain \TeX\ sets |\parskip=0pt plus1pt|;
this gives a little stretchability, but no extra space.
\danger After line breaking is complete, \TeX\ appends the lines to the
current vertical list that encloses the current paragraph, inserting
interline glue as explained in Chapter@12; this interline glue will
depend on the values of\/ ↑{*baselineskip}, ↑{*lineskip}, and ↑{*lineskiplimit}
that are currently in force. \TeX\ will also insert penalties into
the vertical list, just before each glob of ↑{interline glue}, in order to
help control page breaks that might have to be made later. For example, a
special penalty will be assessed for breaking a page between the first two
lines of a paragraph, or just before the last line, so that ``club'' or
``widow'' lines that are detached from the rest of a paragraph will not
appear all alone on a page unless the alternative is worse.
\danger Here's how interline penalties are calculated: \TeX\ has just
chosen the breakpoints for some paragraph, or for some partial paragraph
that precedes a displayed equation; and $n$@lines have been formed.
The penalty between lines $j$ and $j+1$, given a value of $j$ in the
range $1\le j<n$, is the value of\/ ↑{*interlinepenalty} plus
additional charges made in special cases: The ↑{*clubpenalty} is
added if $j=1$, i.e., just after the first line; then the
↑{*displaywidowpenalty} or the ↑{*widowpenalty} is added if $j=n-1$,
i.e., just before the last line, depending on whether or not
the current lines immediately precede a display; and finally the
↑{*brokenpenalty} is added, if the $j$th line ended at a discretionary break.
(Plain \TeX\ sets |\clubpenalty=150|,
|\widowpenalty=150|, |\displaywidowpenalty=50|, and |\brokenpenalty=100|;
the value of\/ |\interlinepenalty| is normally zero, but it is increased to 100
within footnotes, so that
long footnotes will tend not to be broken between pages.)
\dangerexercise Consider a five-line paragraph in which the second and fourth
lines end with hyphens. What penalties does plain \TeX\ put between the lines?
\answer 150, 100, 0, 250. \ (When the total penalty is zero, as between lines
3 and@4 in this case, no penalty is actually inserted.)
\dangerexercise What penalty goes between the lines of a two-line paragraph?
\answer |\interlinepenalty| plus |\clubpenalty| plus |\widowpenalty| (and
also plus |\brokenpenalty|, if the first line ends with a discretionary break).
\ddanger If you say ↑{*vadjust}|{|\<vertical list>|}| within a paragraph,
\TeX\ will insert the specified internal vertical list into the vertical
list that encloses the paragraph, immediately after whatever line
contained the position of the |\vadjust|. For example, you can say
`|\vadjust{\vskip1pt}|' to increase the amount of space between lines of a
paragraph that would otherwise come out too close together. \ (The
\vadjust{\vskip1pt}author
did it in the previous line, just to illustrate what happens.) \ Also,
if you want to make sure that a page break will occur immediately after a
certain line, you can say `|\vadjust{\eject}|' ↑(:eject) anywhere in that line.
\ddanger Later chapters discuss |\insert| and |\mark| commands that are
relevant to \TeX's page builder. If such commands appear within a
paragraph, they are removed from whatever horizontal lines contain them
and placed into the enclosing vertical list, together with other vertical
material from |\vadjust| commands that might be present. In the final
vertical list, each horizontal line of text is an hbox that is immediately
preceded by interline glue and immediately followed by vertical material
that was removed from that line (with left to right order preserved, if
there are several insertions); then comes the interline penalty, if it is
nonzero. No additional ↑{interline glue} is computed for inserted vertical
material.
\ddangerexercise Design a |\marginalstar| macro ↑(marginal notes)
that can be used anywhere in a paragraph. It should use |\vadjust| to
place an asterisk in the margin just to the left of the line where
|\marginalstar| occurs.
\answer The tricky part is to avoid ``opening up'' the paragraph by
adding anything to its height; yet this star is to be contributed after
a line having an unknown depth, because the depth of the line depends
on details of line breaking that aren't known until afterwards.
The following solution uses ↑{:strut}, assuming that |\strutdepth| is
the depth of the current strut (and of the line containing that strut).
\begintt
\def\marginalstar{\strut\vadjust{\vskip-\strutdepth
\specialstar}}
\endtt
Here |\specialstar| is a box of height zero and depth |\strutdepth|,
and it puts an asterisk in the margin:
\begintt
\def\specialstar{\vtop to \strutdepth{
\baselineskip\strutdepth
\vss\llap{* }\null}}
\endtt
\ddanger When \TeX\ enters ↑{horizontal mode}, it will interrupt its normal
scanning to read tokens that were predefined by the command
↑{*everypar}|{|\<token list>|}|. For example, suppose you have said
`|\everypar{A}|'. If you type `|B|' in vertical mode, \TeX\ will shift
to horizontal mode (after contributing ↑{*parskip} glue to the current
page), and a horizontal list will be initiated by inserting an empty box
of width ↑{*parindent}. Then \TeX\ will read `|AB|', since it reads the
|\everypar| tokens before getting back to the `|B|' that triggered the
new paragraph. Of course, this is not a very useful illustration of
|\everypar|; but if you let your imagination run you will think of
better applications.
\ddangerexercise Use |\everypar| to define an |\insertbullets| macro: All
paragraphs in a group of the form `|{\insertbullets ...}|' should have a
bullet symbol `$\bullet$' as part of their indentation.
↑(bulleted lists)
\answer |\def\insertbullets{\everypar{\llap{$\bullet$\enspace}}}|\par
\nobreak\smallskip\noindent
(A similar device can be used to insert hanging indentation,
and/or to number the paragraphs automatically.)
\ddanger A paragraph of zero lines is formed if you say `|\noindent\par|'.
If\/ |\everypar| is null, such a paragraph contributes nothing except
|\parskip| glue to the current vertical list.
\ddangerexercise Guess what happens if you say `|\noindent$$...$$ \par|'.
\answer First comes |\parskip| glue (but you might not see it on the current
page if you say |\showlists|, since glue disappears at the top of each
page). Then comes the result of\/ |\everypar|, but let's assume that
|\everypar| doesn't add anything to the horizontal list, so that
you get an empty horizontal list; then there's no partial paragraph
before the display. The displayed equation follows the normal rules
(it occupies lines 1--3 of the paragraph, and uses the indentation and
length of line@2, if there's a nonstandard shape). Nothing follows the
display, since a blank space is ignored after a closing `|$$|'.\par
Incidentally, the behavior is different if you start a paragraph with
`|$$|' instead of with |\noindent$$|, ↑(display at beginning of paragraph)
since \TeX\ inserts a paragraph indentation that will appear on a line by
itself (with |\leftskip| and |\parfillskip| and |\rightskip| glue).
\ddanger Experience has shown that \TeX's line-breaking algorithm can be
harnessed to a surprising variety of tasks. Here, for example, is an application
that indicates one of the possibilities: Articles that are published in
{\sl↑{Mathematical Reviews}\/\null} are generally signed with the reviewer's
name and address, and this information is typeset flush right, i.e., at
the right-hand margin. ↑(flush right)
If there is sufficient space to put such a name and address at the right of
the final line of the paragraph, the publishers can save space, and at the same
time the results look better because there are no strange gaps on the page.
\def\signed #1 (#2){{\unskip\nobreak\hfil\penalty50\hskip2em
\hbox{}\nobreak\hfil\sl#1\/ \rm(#2)
\parfillskip=0pt \finalhyphendemerits=0 \endgraf}}
$$\displayvbox{\hsize 3.0in \parindent0pt
This is a case where the name and address fit in nicely with the review.
\signed A. Reviewer (Ann Arbor, Mich.)
\medskip
But sometimes an extra line must be added. \signed N. Bourbaki (Paris)}$$
↑(Reviewer) ↑(Bourbaki)
Let's suppose that a space of at least two ems should separate the reviewer's
name from the text of the review, if they occur on the same line. We would
like to design a macro so that the examples shown above could be typed
as follows in an input file:
\begintt
... with the review. \signed A. Reviewer (Ann Arbor, Mich.)
... an extra line must be added. \signed N. Bourbaki (Paris)
\endtt
Here is one way to solve the problem:
\begintt
\def\signed #1 (#2){{\unskip\nobreak\hfil\penalty50
\hskip2em\hbox{}\nobreak\hfil\sl#1\/ \rm(#2)
\parfillskip=0pt \finalhyphendemerits=0 \par}}
\endtt
If a line break occurs at the |\penalty50|, the |\hskip2em| will disappear
and the empty |\hbox| will occur at the beginning of a line, followed by
|\hfil| glue. This yields two lines whose badness is zero; the first of these
lines is assessed a penalty of@50. But if no line break occurs at the
|\penalty50|, there will be glue of $2\,{\rm em}$ plus $2\,{\rm fil}$ between
the review and the name; this yields one line of badness zero. \TeX\ will
try both alternatives, to see which leads to the fewest total demerits.
The one-line solution will usually be preferred if it is feasible.
\ddangerexercise Explain what would happen if `|\hbox{}|' were left out
of the |\signed| macro.
\answer A break at |\penalty50| would cancel |\hskip2em\nobreak\hfil|,
so the next line would be forced to start with the reviewer's name flush left.
\ddangerexercise Why does the |\signed| macro say
`|\finalhyphendemerits=0|'\thinspace?
\answer Otherwise the line-breaking algorithm might prefer two final lines to
one final line, simply in order to move a hyphen from the second-last line up
to the third-last line where it doesn't cause demerits. This in fact caused
some surprises when the |\signed| macro was being tested; |\tracingparagraphs=1|
was used to diagnose the problem.
{\hbadness=10000
\ddangerexercise In one of the paragraphs earlier in this chapter, the author
used ↑{:break} to force a line break in a specific place; as a result, the
third line of that particular paragraph was really spaced out.\break
Explain why all the extra space went into the third line, instead of being
distributed impartially among the first three lines.
\answer Distributing the extra space evenly would lead to three lines of
the maximum badness (10000). It's better to have just one bad line
instead of three, since \TeX\ doesn't distinguish degrees of badness when
lines are really awful. In this particular case the ↑{*tolerance} was 200,
so \TeX\ didn't try any line breaks that would stretch the first two lines;
but even if the tolerance had been raised to 10000, the optimum setting would
have had only one underfull line. If you really want to spread the
space evenly you can do so by using ↑{*spaceskip} to increase the
amount of stretchability between words.
}
\ddangerexercise Devise a ↑{:raggedcenter} macro (analogous to ↑{:raggedright})
that partitions the words of a paragraph into as few as possible lines
of approximately equal size and centers each individual line. Hyphenation
should be avoided if possible.
\answer |\def\raggedcenter{\leftskip=0pt plus4em \rightskip=\the\leftskip|%
\parbreak|\parfillskip=0pt \spaceskip=.3333em \xspaceskip=.5em|\parbreak
|\pretolerance=9999 \tolerance=9999|\parbreak
|\hyphenpenalty=9999 \exhyphenpenalty=9999 }|
\endchapter
When the author objects to [a hyphenation]\/
he should be asked to add or cancel or substitute
a word or words that will prevent the breakage.
\smallskip
Authors who insist on even spacing always,
with sightly divisions always,
do not clearly understand the rigidity of types.
\author T. L. ↑{DE VINNE}, {\sl Correct Composition\/} (1901) % p138, p206
\bigskip
In reprinting his own works, whenever [William ↑{Morris}]\/
found a line that justified awkwardly, he altered the wording
solely for the sake of making it look well in print.
\smallskip
When a proof has been sent me with two or three
lines so widely spaced as to make a grey band across the page,
I have often rewritten the passage so as to fill up the lines better;
but I am sorry to say that my object has generally been so little
understood that the compositor has spoilt all the rest
of the paragraph instead of mending his former bad work.
\author GEORGE BERNARD ↑{SHAW}, in {\sl The Dolphin\/} (1940) % v4 p80
\eject
\beginchapter Chapter 15. How \TeX\ Makes\\Lines Into Pages
\tracingpages=1
\TeX\ attempts to choose desirable places to divide your document into
individual pages, and its technique for doing this usually works pretty
well. But the problem of ↑{page make-up} is considerably more difficult
than the problem of line breaking that we considered in the previous chapter,
because pages often have much less flexibility than lines do. If the
vertical glue on a page has little or no ability to stretch or to shrink,
\TeX\ usually has no choice about where to start a new page; conversely, if
there is too much variability in the glue, the result will look bad because
different pages will be too irregular. Therefore if you are fussy about the
appearance of pages, you can expect to do some rewriting of the manuscript
until you achieve an appropriate balance, or you might need to fiddle
with the ↑{*looseness} as described in Chapter@14; no automated system will
be able to do this as well as you.
Mathematical papers that contain a lot of displayed equations have an
advantage in this regard, because the glue that surrounds a display tends to
be quite flexible. \TeX\ also gets valuable room to maneuver when you
have occasion to use ↑{:smallskip} or ↑{:medskip} or ↑{:bigskip} spacing
between certain paragraphs. For example, consider a page that contains
a dozen or so exercises, and suppose that there is $3\pt$ of additional
space between exercises, where this space can stretch to $4\pt$ or
shrink to $2\pt$. Then there is a chance to squeeze an extra line on the page,
or to open up the page by removing one line, in order to avoid splitting
an exercise between pages. Similarly, it is possible to use flexible
glue in special publications like membership rosters or company telephone
directories, so that individual entries need not be split between columns
or pages, yet every column appears to be the same height.
For ordinary purposes you will probably find that \TeX's automatic method
of page breaking is satisfactory. And when it occasionally gives
unpleasant results, you can force the machine to break at your favorite
place by typing `↑{:eject}'. But be careful: |\eject| will cause \TeX\ to
stretch the page out, if necessary, so that the top and bottom baselines
agree with those on other pages. If you want to eject a short page,
filling it with blank space at the bottom, type `|\vfill\eject|' instead.
\danger If you say `|\eject|' in the middle of a paragraph, the paragraph
will end first, as if you typed `|\par\eject|'. But Chapter@14 mentions
that you can say `↑{*vadjust}|{\eject}|' in mid-paragraph, if you want to
force a page break after whatever line contains your current position
when the full paragraph is eventually broken up into lines; the rest of the
paragraph will go on the following page.
\danger To prevent a page break, you can say `↑{:nobreak}' in vertical
mode, just as |\nobreak| in horizontal mode prevents breaks between lines.
For example, it is wise to say |\nobreak| between the title of a subsection
and the first line of text in that subsection. But |\nobreak| does not
cancel the effect of other commands like |\eject| that tell \TeX\ to
break; it only inhibits a break at glue that immediately follows. You
should become familiar with \TeX's rules for line breaks and page breaks
if you want to maintain fine control over everything. The remainder of
this chapter is devoted to the intimate details of page breaking.
\ninepoint
\danger \TeX\ breaks lists of lines into pages by computing badness ratings
and penalties, more or less as it does when breaking paragraphs into lines.
But pages are made up one at a time and removed from \TeX's memory; there is
no looking ahead to see how one page break will affect the next one.
In other words, \TeX\ uses a special method to find the optimum
breakpoints for the lines in an entire paragraph, but it doesn't attempt
to find the optimum breakpoints for the pages in an entire document. The
computer doesn't have enough high-speed memory capacity to remember the
contents of several pages, so \TeX\ simply chooses each page break as best
it can, by a process of ``local'' rather than ``global'' optimization.
\tracingpages=0
\danger Let's look now at the details of \TeX's page-making process.
Everything you contribute to the pages of your document is placed on
the {\sl ↑{main vertical list}}, which is the sequence of items that
\TeX\ has accumulated while in vertical mode. Each item in a ↑{vertical
list} is one of the following types of things:\enddanger
\smallskip
\item\bull a box (an hbox or vbox or rule);
\item\bull a ``↑{whatsit}'' (something special to be explained later);
\item\bull a ↑{mark} (another thing that will be explained later);
\item\bull an ↑{insertion} (yet another thing that we will get to);
\item\bull a glob of ↑{glue} (or ↑{*leaders}, as we will see later);
\item\bull a ↑{kern} (something like glue that doesn't stretch or shrink);
\item\bull a ↑{penalty} (representing the undesirability of breaking here).
\smallskip\noindent
The last three types (glue, kern, and penalty items) are called ↑{discardable},
for the same reason that we called them discardable in horizontal lists.
You might want to compare these specifications with the analogous rules
for the horizontal case, found in Chapter@14; it turns out that vertical
lists are just like horizontal ones except that character boxes, discretionary
breaks, ↑{*vadjust} items, and math shifts cannot appear in vertical lists.
Chapter@12 exhibits a typical vertical list in \TeX's internal box-and-glue
representation.
\danger Page breaks can occur only at certain places within a vertical list.
The permissible breakpoints are exactly the same as in the horizontal case,
namely\enddanger
\smallskip
\item{a)} at glue, provided that this glue is immediately preceded by a
non-discardable item (i.e., by a box, whatsit, mark, or insertion).
\smallskip
\item{b)} at a kern, provided that this kern is immediately followed by glue.
\smallskip
\item{c)} at a penalty (which might have been inserted automatically in a
paragraph).
\smallskip\noindent
Interline glue is usually inserted automatically between the boxes of a
vertical list, as explained in Chapter@12, so there is usually a valid
breakpoint between boxes.
\danger As in horizontal lists, each potential breakpoint has an
associated penalty, which is high for undesirable breakpoints and negative
for desirable ones. The penalty is zero at glue and kern breaks, so it is
nonzero only at explicit penalty breaks. If you say `↑{*penalty}|-100|'
between two paragraphs, you are indicating that \TeX\ should try to break
here because the penalty is negative; a bonus of 100 points for breaking
at this place will essentially cancel up to 100 units of badness that
might be necessary to achieve such a break. A penalty of 10000 or more is
so large that it inhibits breaking; a penalty of $-10000$ or less is so
small that it forces breaking.
\danger Plain \TeX\ provides several control sequences that help to
control page breaks. For example, ↑{:smallbreak}, ↑{:medbreak}, and
↑{:bigbreak} specify increasingly desirable places to break, having
respective penalties of $-50$, $-100$, and@$-200$; furthermore, they will
insert a ↑{:smallskip}, ↑{:medskip}, or ↑{:bigskip} of space,
respectively, if a break is not taken. However, |\smallbreak|,
|\medbreak|, and |\bigbreak| do not increase existing glue unnecessarily;
for example, if you say |\smallbreak| just after a displayed
equation, you won't get a |\smallskip| of space in addition to the glue that
already follows a display. Therefore these commands can conveniently
be used before and after the statements of ↑{theorems}, in a format for
mathematical papers. In the present manual the author has used a macro
that puts |\medbreak| before and after every dangerous-bend paragraph;
|\medbreak\medbreak| is equivalent to a single |\medbreak|, so you don't
see two medskips when one such paragraph ends and another one@begins.
\danger The ↑{:goodbreak} macro is an abbreviation for `|\par\penalty-500|'.
This is a good thing to insert in your manuscript when proofreading, if
you are willing to stretch some page a little bit extra in order to improve the
following one. Later on if you make another change so that this |\goodbreak|
command does not appear near the bottom of a page, it will have no effect;
thus it is not as drastic as |\eject|.
\danger The most interesting macro that plain \TeX\ provides for page make-up
is called ↑{:filbreak}. It means, roughly, ``Break the page here and fill the
bottom with blank space, unless there is room for more
copy that is itself followed by |\filbreak|.'' Thus if you put |\filbreak|
at the end of every paragraph, and if your paragraphs aren't too long,
every page break will occur between paragraphs, and \TeX\ will fit as many
paragraphs as possible on each page. The precise meaning of\/ |\filbreak|@is
\begintt
\vfil\penalty-200\vfilneg
\endtt
according to Appendix B\null; and this simple combination of \TeX's primitives
produces the desired result: If a break is taken at the |\penalty-200|,
the preceding |\vfil| will fill the bottom of the page with blank space,
and the ↑{*vfilneg} will be discarded after the break; but if no break
is taken at the penalty, the |\vfil| and |\vfilneg| will cancel each other
and have no effect.
\danger Plain \TeX\ also provides a ↑{:raggedbottom} command, which is a
vertical analog of\/ ↑{:raggedright}: It tells \TeX\ to permit a small
amount of variability in the bottom margins on different pages, in order
to make the other spacing uniform.
\ddanger We saw in Chapter 14 that breakpoints for paragraphs are chosen
by computing ``demerits'' for each line and summing them over all lines.
The situation for pages is simpler because each page is considered separately.
\TeX\ figures the ``↑{cost}'' of a page break by using the following formula:
$$\display{c=\cases{
p,&if $b<\infty$ and $p\le-10000$ and $q<10000$;\cr
b+p+q,&if $b<10000$ and $-10000<p<10000$ and $q<10000$;\cr
100000,&if $b\ge10000$ and $-10000<p<10000$ and $q<10000$;\cr
\infty,&if ($b=\infty$ or $q\ge10000$) and $p<10000$.\cr}}$$
Here $b$ is the ↑{badness} of the page that would be formed if a break were
chosen here; $p$@is the penalty associated with the current breakpoint;
and $q$@is the sum of all penalties for split insertions on the page,
as explained below. Vertical badness is computed by the same rules as
horizontal badness; it is an integer between 0 and 10000, inclusive,
except when the box is overfull, when it is@$\infty$ (infinity).
↑(infinite badness)
\ddanger When a page is completed, it is removed from the main vertical
list and passed to an ``↑{output routine},'' as we will see below; so its
boxes and glue eventually disappear from \TeX's memory. The remainder of
the main vertical list exists in two parts: First comes the ``↑{current
page},'' which contains all the material that \TeX\ has considered so far
as a candidate for the next page to be broken off; then there are
``↑{recent contributions},'' i.e., items that will be moved to the current
page as soon as \TeX\ finds it convenient to do so. If you say
↑{*showlists}, \TeX\ will display the contents of the current page and the
recent contributions, if any, on your log file. \ (The example in
Chapter@13 doesn't show any such lists because they were both empty in
that case.)
\ddanger Whenever \TeX\ is moving an item from the top of the ``recent
contributions'' to the bottom of the ``current page,'' it will discard a
↑{discardable item} (glue, kern, or penalty) if the current page does not
contain any boxes. This is how glue disappears at a page break. Otherwise
if a discardable item is a legitimate breakpoint, \TeX\ calculates the
cost@$c$ of breaking at this point, using the formula that we have just
discussed. If the resulting@$c$ is less than or equal to the smallest cost
seen so far on the current page, \TeX\ will remember the current
breakpoint as the best so far. And if $c=\infty$ or if $p\le-10000$, \TeX\
will seize the initiative and break the page at the best remembered
breakpoint. Any material on the current page following that best
breakpoint is moved back onto the list of recent contributions, where it
will be considered again; thus the ``current page'' typically gets more
than one page's worth of material before the breakpoint is chosen.
\ddanger This procedure may seem mysterious until you see it in action.
Fortunately, there is a convenient way to watch it;
you can set ↑{*tracingpages}|=1|, thereby instructing \TeX\ to put its
page-cost calculations into your log file. For example, here is what
appeared on the log file when the author used |\tracingpages=1| at the
beginning of the present chapter:
\begintt
%% goal height=528.0, max depth=2.2
% t=10.0 g=528.0 b=10000 p=150 c=100000#
% t=22.0 g=528.0 b=10000 p=0 c=100000#
% t=34.0 g=528.0 b=10000 p=0 c=100000#
... |hbox|lb|rm(25 similar lines are being omitted here)|rb
% t=346.0 g=528.0 b=10000 p=0 c=100000#
% t=358.0 g=528.0 b=10000 p=150 c=100000#
% t=370.02222 g=528.0 b=10000 p=-100 c=100000#
% t=398.0 g=528.0 b=10000 p=0 c=100000#
% t=409.0 g=528.0 b=10000 p=0 c=100000#
% t=420.0 g=528.0 b=10000 p=150 c=100000#
% t=431.0 g=528.0 b=10000 p=-100 c=100000#
% t=459.0 g=528.0 b=10000 p=0 c=100000#
% t=470.0 g=528.0 b=10000 p=0 c=100000#
% t=481.0 g=528.0 b=10000 p=0 c=100000#
% t=492.0 g=528.0 b=10000 p=0 c=100000#
% t=503.0 g=528.0 b=3049 p=0 c=3049#
% t=514.0 g=528.0 b=533 p=150 c=683#
% t=525.0 g=528.0 b=5 p=-100 c=-95#
% t=553.0 g=528.0 b=* p=0 c=*
\endtt
This trace output is admittedly not ``user-friendly'' in appearance, but
after all it comes from deep inside \TeX's bowels where things have been
reduced to numeric calculations. You can learn to read it with a little
practice, but you won't need to do so very often unless you need to
plunge into page-breaking for special applications. Here's what it
means: The first line, which starts with `|%%|', ↑(percent percent)
is written when the first box or insertion enters the current page list;
it shows the ``↑{goal height}'' and the ``↑{max depth}'' that will be
used for that page (namely, the current values of\/ ↑{*vsize} and
↑{*maxdepth}). In the present manual we have |\vsize=44pc| and
|\maxdepth=2.2pt|; dimensions in the log file are always displayed in
points. The subsequent lines, which start with a single `|%|', ↑(percent)
are written whenever a legal breakpoint is being moved from the list of
recent contributions to the current page list. Every |%|@line shows $t$,
which is the total natural height so far if a page break were to occur,
and $g$, which is the goal height; in this example $g$@stays fixed at
$528\pt$, but $g$ would have decreased if insertions such as footnotes had
occurred on the page. The values of@$t$ are steadily increasing from 10 to
22 to@34, etc.; baselines are $12\pt$ apart at the top of the page and
$11\pt$ apart at the bottom (where material is set in nine-point type).
We are essentially seeing one |%|@line per hbox of text being placed on
the current page. However, the |%|@lines are generated by the penalty or
glue items that follow the hboxes, not by the boxes themselves. Each
|%|@line shows also the badness@$b$, the penalty@$p$, and the cost@$c$
associated with a breakpoint; if this cost is the best so far, it is
marked with a `|#|' sign, ↑(sharp) meaning that ``this breakpoint will be
used for the current page if nothing better comes along.'' Notice that the
first 40 or so breaks all have $b=10000$, since they are so bad that \TeX\
considers them indistinguishable; in these cases $c=100000$, so \TeX\ simply
accumulates material until the page is full enough to
have $b<10000$. A penalty of 150 reflects the ↑{*clubpenalty} or the
↑{*widowpenalty} that was inserted as described in Chapter@14. The three
lines that say |p=-100| are the breakpoints between ``dangerous bend''
paragraphs; these came from ↑{:medbreak} commands. The notation |b=*| and
|c=*| on the final line means that $b$ and@$c$ are infinite; the total
height of $553\pt$ cannot be reduced to $528\pt$ by shrinking the
available glue. Therefore the page is ejected at the best previous place,
which turns out to be a pretty good break: |b=5| and |p=-100| yields a
net cost of $-95$.
\ddangerexercise Suppose the paragraph at the bottom of the example page
had been one line shorter; what page break would have been chosen?
\answer The last three page-break calculations would have been
\begintt
% t=503.0 g=528.0 b=3049 p=150 c=3199#
% t=514.0 g=528.0 b=533 p=-100 c=433#
% t=542.0 g=528.0 b=* p=0 c=*
\endtt
so the break would have occurred at the same place. The badness would have
been@533, but the page would still have looked tolerable. \ (On the other
hand if that paragraph had been two lines shorter instead of one,
the first two lines of the next ``dangerous bend'' paragraph
would have appeared on that page; the value $t=531\pt$ would have
been able to shrink to $g=528\pt$ because the three ``medskips'' on
the page would have had a total shrinkability of $6\pt$. This would certainly
have been preferable to a stretched-out page whose badness was@3049; but the
author might have seen it and written another sentence or two, so that
the paragraph would not have been broken up. After all, this manual is supposed
to be an example of good practice.)
\ddangerexercise The last two ``|%| lines'' of this example show $t$ jumping
by $28\pt$, from 525.0 to@553.0. Explain why there was such a big jump.
\answer The next legal break after the beginning of a dangerous bend
paragraph occurs $28\pt$ later, because there is $6\pt$ additional space for
a |\medskip|, followed by two lines of $11\pt$ each. \TeX\ does not
allow breaking between those two lines; the ↑{*clubpenalty} is set briefly
to 10000 in Appendix@E\null, since the dangerous bend symbol is two lines tall.
\ddanger The ↑{*maxdepth} parameter tells \TeX\ to raise the bottom box
on the page if that box has too much depth, so that the depth of the
constructed page will not exceed a specified value. \ (See the discussion of
↑{*boxmaxdepth} in Chapter@12.) \ In our example |\maxdepth=2.2pt|, and
the influence of this parameter can be seen in the line that says
`|% t=370.02222|'. Ordinarily $t$@would have been 370.0 at that breakpoint;
but the hbox preceding it was unusual because it contained the letter
|j| in ↑{:tt}, and a 10-point typewriter-style |j| descends $2.22222\pt$
below the baseline. Therefore \TeX\ figured badness as if the hbox were
$.02222\pt$ higher and only $2.2\pt$ deep.
\ddanger Notice that the first ``|% line|'' of our example says |t=10.0|;
this is a consequence of another parameter, called ↑{*topskip}. Glue
disappears at a page break, but it is desirable to produce pages whose top
and bottom baselines occur in predetermined positions, whenever possible;
therefore \TeX\ inserts special glue just before the first box on each
page. This special glue is equal to |\topskip|, except that the natural
space has been decreased by the height of the first box, or it has been
set to zero in lieu of a negative value. For example, if\/ |\topskip=20pt
plus2pt|, and if the first box on the current page is $13\pt$ tall, \TeX\
inserts `|\vskip7pt plus2pt|' just above that box. Furthermore, if the
first box is more than $20\pt$ tall, `|\vskip0pt plus2pt|' is
inserted. But this example is atypical, since the |\topskip|
glue usually has no stretchability or shrinkability; plain \TeX\ sets
|\topskip=10pt|.
\ddangerexercise Assume that |\vsize=528pt|, |\maxdepth=2.2pt|,
|\topskip=10pt|, and that no |\insert| commands are being used. \TeX\ will
make pages that are $528\pt$ high, and the following two statements will
normally be true: (a)@The baseline of the topmost box on the page will be
$10\pt$ from the top, i.e., $518\pt$ above the baseline of the page itself.
(b)@The baseline of the bottommost box on the page will coincide with the
baseline of the page itself. Explain under what circumstances (a) and@(b)
will fail.
\answer A page always contains at least one box, if there are no
insertions, since the legal breakpoints are discarded otherwise.
Statement@(a) fails if the height of the topmost box exceeds $10\pt$.
Statement@(b) fails if the depth of the bottommost box exceeds $2.2\pt$, or
if some glue or kern comes between the bottommost box and the page break
(unless that glue or kern exactly cancels the depth of the box).
\ddanger Since |\vsize|, |\maxdepth|, and |\topskip| are
parameters, you can change them at any time; what happens if you do?
Well, \TeX\ salts away the values of\/ |\vsize| and |\maxdepth| when
it prints the ``|%%|@line,'' i.e., when the first box or insertion
occurs on the current page; subsequent changes to those two parameters
have no effect until the next current page is started. On the other hand,
\TeX\ looks at |\topskip| only when the first box is being contributed
to the current page. If insertions occur before the first box, the
|\topskip| glue before that box is considered to be a valid breakpoint;
this is the only case in which a completed page might not contain a box.
\ddanger You can look at the $t$ and $g$ values that are used in page breaking
by saying `|\the|↑{*pagetotal}' and `|\the|↑{*pagegoal}', respectively.
You can even change them (but let's hope that you know what you are doing).
For example, the command |\pagegoal=500pt| overrides the previously saved
value of\/ |\vsize|. When the current page contains no boxes,
|\the\pagetotal| is zero and |\the\pagegoal| is $16383.99998\pt$ (\TeX's
largest \<dimen>); changing their values has no effect at such times.
Besides |\pagetotal|, which represents the accumulated natural height,
\TeX\ maintains the quantities ↑{*pagestretch}, ↑{*pagefilstretch},
↑{*pagefillstretch}, ↑{*pagefilllstretch}, ↑{*pageshrink}, and
↑{*pagedepth}. The integer $q$ in the formula for page costs is
also available for inspection and change; it is called ↑{*insertpenalties}.
\ddanger Page breaking differs from line breaking in one respect that
deserves mention here: If you say |\eject\eject|, the second |\eject| is
ignored, because it is equivalent to |\penalty-10000| and penalties are
discarded after a page break. But if you say |\break\break| in a
paragraph, the second ↑{:break} causes an empty line, because penalties
are discarded after a break in a paragraph only if they do not belong to
the final sequence of breakpoints. This technicality is unimportant in
practice, because |\break\break| isn't a good way to make an empty line;
that line will be an underfull hbox, since it has no glue in it.
Similarly, `|\eject\eject|' would not be a good way to make an empty page,
even if \TeX\ were to change its rules so that an |\eject| would never be
ignored. The best way to eject an ↑{empty page} is to say
`|\eject\line{}\vfil\eject|', and the best way to create an ↑{empty line}
is `|\break\hbox{}\hfil\break|'. Both of these avoid underfull boxes.
\danger You are probably wondering how page numbers and such things
get attached to pages. The answer is that \TeX\ allows you to do further
processing after each page break has been chosen; a special ``output
routine'' goes into action before pages actually receive their final
form. Chapter@23 explains how to construct output routines and how to
modify the output routine of plain@\TeX.
\danger Every once in a@while, \TeX\ will produce a really awful-looking
page and you will wonder what happened. For example, you might get just
one paragraph and a lot of white space, when some of the text on the
following page would easily fit into the white space. The reason for such
apparently anomalous behavior is almost always that no good page break
is possible; even the alternative that looks better to you is quite terrible
as far as \TeX\ is concerned! \TeX\ does not distinguish between two
choices that both have 10000 units of badness or more, even though some bad
breaks do look much worse than others. The solution in such cases is to
insert |\eject| or |\vfill\eject| in some acceptable spot, or to
revise the manuscript. If this problem arises frequently, however,
you probably are using a format that sets overly strict limitations
on page format; try looking at the output of\/ |\tracingpages| and
modifying some of \TeX's parameters, until you have better luck.
\danger The remainder of this chapter is about insertions: things like
footnotes and ↑{illustrations}, and how they interact with page breaks.
Before we discuss the primitive operations by which \TeX\ deals with
insertions, we will take a look at the facilities that plain \TeX\ provides
at a higher level.
\danger Illustrations can be inserted in several ways using plain \TeX.
The simplest of these is called a ``↑{floating topinsert}''; you say
$$\displaybox{|\topinsert|\<vertical mode material>|\endinsert|}$$
↑(:topinsert)↑(:endinset) and \TeX\ will
attempt to put the vertical mode material at the top of the current page.
If there's no room for such an insertion on one page, \TeX\ will
insert it at the top of the next page. The \<vertical mode material>
can contain embedded paragraphs that temporarily interrupt vertical
mode in the usual way; for example:
\begintt
\topinsert \vskip 2in
\hsize=3in \raggedright
\noindent{\bf Figure 3.} This is the caption to the
third illustration of my paper. I have left two inches
of space above the caption so that there will be room
to introduce special artwork. \endinsert
\endtt
The ↑{caption} in this example will be set ↑{ragged-right} in a 3-inch
column at the left of the page. Plain \TeX\ automatically adds a
``↑{bigskip}'' below each topinsert; this will separate the caption from
the text. The effects of\/ |\hsize=3in| and |\raggedright| do not extend
past the |\endinsert|, since ↑{grouping} is implied.
\dangerexercise Modify this example so that the caption is moved over next to
the right margin, instead of appearing at the left.
\answer |\topinsert\vskip2in\rightline{\vbox{\hsize|\stretch|...|\stretch
|artwork.}}\endinsert|
does the job; but ↑{*leftskip}|3.5in| would be slightly more efficient than
↑{:rightline}, since \TeX\ would not have to read the text of the caption
twice. To compute the amount of\/ |\leftskip|, one could of course say
|\setdimen0=\the\hsize \advdimen0 by-3in \leftskip=\dimen0|.
\danger Similarly, if you say `↑{:pageinsert}\stretch\<vertical mode
material>\stretch |\endinsert|', the vertical mode material will be
justified to the size of a full page (without a bigskip below it);
the result will appear on the following page.
\danger There's also `↑{:midinsert}\stretch\<vertical mode material>\stretch
|\endinsert|', which tries first to insert the material in place, wherever
you happen to be, in the middle of the current page. If there is enough
room, you get the effect of↑(:bigskip)↑(:bigbreak)
$$\displaybox{|\bigskip\vbox{|\<vertical mode material>|}\bigbreak|}$$
otherwise the |\midinsert| is effectively converted to a |\topinsert|.
There is a slight probability that |\midinsert| will not find the
best placement, because \TeX\ is sometimes processing text ahead of the
current page. You may want to say `↑{:goodbreak}' just before |\midinsert|.
\danger You should use the commands |\topinsert|, |\pageinsert|,
|\midinsert| in vertical mode (i.e., between paragraphs), not inside of
boxes or other insertions.
\danger If you have two or more |\topinsert|, |\pageinsert|, or
|\midinsert| commands in quick succession, they may carry over to several
subsequent pages; but they will retain their relative order when they are
carried over. For example, suppose you have pages that are nine inches
tall, and suppose you have already specified 4@inches of text for some
page, say page@25. Then suppose you make seven topinserts in a row, of
respective sizes $1,2,3,9,3,2,1$ inches; the 9-inch one is actually a
|\pageinsert|. What happens? Well, the first and second will appear at
the top of page 25, followed by the 4@inches of copy you have already
typed; that copy will immediately be followed by two more inches that you
type after the seven inserts. The third topinsert will appear at the top
of page@26, followed by six more inches of text; the fourth will fill
page@27; and the remaining three will appear at the top of page@28.
\dangerexercise What would happen in the example just discussed if the final
1-inch insertion were a |\midinsert| instead of a |\topinsert|?
\answer It would appear on page@25, since it does fit there. A |\midinsert|
will jump ahead of other insertions only if it is not carried over to another
page; for example, if the second 3-inch insertion were a |\midinsert|, it would
not appear on page@26, because it is converted to a |\topinsert| as soon as the
|\midinsert| macro notices that the insertion is too big for page@25.
\danger At the end of a paper, you probably want to make sure that no
insertions are lost; and at the end of a chapter, you probably want to make
sure that no insertions float into the following chapter. Plain \TeX\ will
flush out all remaining insertions, with blank space filling the bottom of
incomplete pages, if you say `|\vfill|↑{:supereject}'.
\danger Besides illustrations that are inserted at the top of a page,
plain \TeX\ will also insert ↑{footnotes} at the bottom of a page. The
↑{:footnote} macro is provided for use within paragraphs;\footnote*{Like
this.} for example, the footnote in the present sentence was typed
in the following way:
\begintt
...paragraphs;\footnote*{Like this.} for example, ...
\endtt
\def\\{\char'134 }%
There are two parameters to a |\footnote|; first comes the ↑{reference mark},
which will appear both in the paragraph\footnote{**}{The author typed
`{\tt paragraph\\footnote\char`\{**\char`\}\char`\{The author ...\char
`\}}' here.} and in the footnote itself,
and then comes the text of the footnote.\footnote{$↑{45}$}{And
`{\tt footnote.\\footnote\char`\{\char`\$\char'17\char`\{45\char`\}\char
`\$\char`\}\char`\{And ...\char`\}}' here. The footnotes in this manual
appear in smaller type, and they are set with hanging indentation;
furthermore a smallskip occurs between footnotes on the same page. But
in plain \TeX, footnotes are typeset with the normal size of type, with
{\tt\\textindent} used for the reference mark, and without extra smallskips.
The {\tt\\textindent} macro is like {\tt\\item}, but it omits hanging
indentation.}
The latter text may be several paragraphs long, and it may contain
displayed equations and such things, but it should not involve other
insertions. \TeX\ will ensure that each footnote occurs at the bottom of the
same page as its reference.\footnote\dag{Printers often use the symbols
{\tt\\dag} (\dag), {\tt\\ddag} (\ddag), {\tt\\section} (\section), and
{\tt\\P}@(\P) as reference marks. You can say, e.g,
`{\tt\\footnote\\dag\char`\{...\char`\}}'.} A long footnote will be
split, if necessary, and continued at the bottom of the following page, as
you can see in the ↑(:dag)↑(:ddag)↑(:section)↑(:textindent)↑(:item)
somewhat contrived example here. Authors who are interested in good
exposition should avoid footnotes whenever possible, since they tend to be
↑(:Gibbon) distracting.\footnote\ddag{Yet Gibbon's {\sl Decline and
Fall\/} would not have been the same without footnotes.}
\danger The |\footnote| macro should be used only in paragraphs that are
contributed to \TeX's main vertical list; insertions will be lost if they
occur inside of boxes. Thus, you should not try to put a |\footnote| into
a math formula, or into some table entry inside an ↑{*halign}, or even
in a ↑{:centerline}. To get around this restriction, there is a ↑{:vfootnote}
macro that can be used in vertical mode; for example:
\begintt
\centerline{A paper by R. J. Drofnats*}
\vfootnote*{Supported by NSF.}
\endtt
Notice that the reference mark `|*|' occurs twice, once in place (after
|Drofnats|) and once as the first argument to |\vfootnote|. You should put
|\vfootnote| somewhere in vertical mode on the page that contains the
other appearance of the reference mark.
\ddanger Topinserts work fine by themselves, and footnotes work fine by
themselves, but complications can arise when you try to mix them in devious
ways. For example, if a |\pageinsert| floats to the page that follows a
long footnote that had to be broken, both of the held-over insertions
may try to force themselves onto the same page, and an overfull vbox
may result. Furthermore, insertions cannot appear within insertions, so
you can't use either |\footnote| or |\vfootnote| within a |\topinsert|.
If you really need a footnote in some caption, you will have to guess where
that caption will finally fall, so that you can put a |\vfootnote| on that page.
In such complex circumstances you might want to rethink whether or not you
are really using the most appropriate format in your publication.
\danger Now let's study the primitives of \TeX\ that are used to construct
macros like |\topinsert| and |\footnote|. We are about to enter behind the
scenes into a sublanguage of \TeX\ that permits users to do complex
manipulations with boxes and glue. Our discussion will be in two parts:
First we shall consider \TeX's ``↑{registers},'' with which a user can
do ↑{arithmetic} related to typesetting; and then we shall discuss the insertion
items that can appear in horizontal and vertical lists. Our discussion of the
first topic (registers) will be marked with single dangerous-bend signs,
since registers are of general use in advanced applications of \TeX,
whether or not they relate to insertions. But the second topic will be marked
with double dangerous-bend signs, since insertions are rather esoteric.
\danger \TeX\ has 256 registers called ↑{*count}|0| to |\count255|, each capable
of containing integers between $-2147483647$ and $+2147483647$, inclusive;
↑(<number)
i.e., the magnitudes should be less than $2↑{31}$. \TeX\ also has 256 registers
called ↑{*dimen}|0| to |\dimen255|, each capable of containing a ↑{<dimen}
(see Chapter@10). There are another 256 registers called ↑{*skip}|0| to
|\skip255|, each containing ↑{<glue} (see Chapter@12); and ↑{*muskip}|0| to
|\muskip255|, each containing ↑{<muglue} (see Chapter@18). You can assign
new values to these registers by saying
$$\halign{\indent#\hfil\cr
|\setcount|\<number> |=| \<number>\cr
|\setdimen|\<number> |=| \<dimen>\cr
|\setskip|\<number> |=| \<glue>\cr
|\setmuskip|\<number> |=| \<muglue>\cr}$$
↑(*setcount)↑(*setdimen)↑(*setskip)↑(*setmuskip)
↑(*advcount)↑(*advdimen)↑(*advskip)↑(*advmuskip)
and then you can add or subtract values of the same type by saying
$$\halign{\indent#\hfil\cr
|\advcount|\<number> |by| \<number>\cr
|\advdimen|\<number> |by| \<dimen>\cr
|\advskip|\<number> |by| \<glue>\cr
|\advmuskip|\<number> |by| \<muglue>\cr}$$
For example, `|\setdimen8=\the\hsize \advdimen8 by 1in|' sets register
|\dimen8| to an inch more than the current value of the normal line size.
\danger If infinite glue components are added, lower order infinities disappear.
For example, after the two commands
\begintt
\setskip2 = 0pt plus 2fill minus 3fill
\advskip2 by 4pt plus 1fil minus 2filll
\endtt
the value of\/ |\skip2| will be $4\pt$ plus $2\,{\rm fill}$ minus
$2\,{\rm filll}$.
\danger Multiplication and division are possible too, but only by integers.
For example, `|\multdimen4 by 3|' triples the value of\/ |\dimen4|, and
`|\divskip5 by 2|' cuts in half all three components of the glue that is
currently registered in |\skip5|. There are eight multiplication and
division commands:
↑{*multcount}\<number> |by| \<number>,
↑{*multdimen}\<number> |by| \<number>,
\dots, ↑(*multskip)↑(*multmuskip)↑(*divcount)↑(*divdimen)↑(*divskip)
and ↑{*divmuskip}\<number> |by| \<number>.
You shouldn't divide by zero, nor should you multiply by numbers that
will make the results exceed the register capacities. Division of a positive
integer by a positive integer discards the remainder, and the sign of the
result changes if you change the sign of either operand. For example,
14@divided by@3 yields@4; $-14$@divided by@3 yields@$-4$;
$-14$@divided by@$-3$ yields@4. Dimension values are integer multiples
of@↑{sp} (scaled points).
\danger You can use any |\count| register in the context of a ↑{<number},
any |\dimen| register in the context of a ↑{<dimen}, any |\skip| register
in the context of ↑{<glue}, and any |\muskip| register in the context of
↑{<muglue}. For example, `|\hskip\skip1|' puts horizontal glue into a
list, based on the contents of\/ |\skip1|; and if\/ |\count5| is 20, the
command `|\advdimen20 by\dimen\count5|' is equivalent to `|\multdimen20 by 2|'.
\danger A |\dimen| register can be used also in the context of a \<number>,
and a |\skip| register can be used as a \<dimen> or a \<number>.
\TeX\ converts \<glue> to \<dimen> by omitting the stretch and shrink
components, and it converts \<dimen> to \<number> by assuming units
of@sp (scaled points). For example, if\/ |\skip1| holds the value
$1\pt$ plus@$2\pt$, then `|\setdimen1=\skip1|' sets |\dimen1| equal
to@$1\pt$; and the commands `|\setcount2=\dimen1|' or `|\setcount2=\skip1|' will
set |\count2| equal to@65536. These rules also apply to \TeX's internal
parameters, when you prefix a parameter name by `↑{*the}'. For example,
`|\setdimen2=\the\baselineskip|' will set |\dimen2| to the natural space
component of the current baselineskip glue.
\dangerexercise Test your knowledge of \TeX's registers by stating the
results of each of the following commands when they are performed in sequence:
\begintt
\setcount1=50 \setdimen2=\count1pt \divcount1 by 8
\setskip2=-10pt plus\count1fil minus\dimen2
\multskip2 by-\count1 \divskip2 by \dimen2 \setcount6=\skip2
\setskip1=.5dm2 plus\skip2 minus\count\count1fill
\multskip2 by\skip1 \advskip1 by-\skip2
\endtt
\answer Set |\count1| to 50,
then |\dimen2| to@$50\pt$,
then |\count1| to@6,
then |\skip2| to@$-10\pt$ plus@$6\,{\rm fil}$ minus@$50\pt$,
then |\skip2| to@$60\pt$ plus@$-36\,{\rm fil}$ minus@$-300\pt$,
then |\skip2| to@$1\,{\rm sp}$ minus@$-6\,{\rm sp}$,
then |\count6| to@1,
then |\skip1| to@$25\pt$ plus@$1\,{\rm sp}$ minus@$1\,{\rm fill}$,
then |\skip2| to@$25\pt$ minus@$-150\pt$,
then |\skip1| to@$0\pt$ plus@$1\,{\rm sp}$ minus@$1\,{\rm fill}$.
\dangerexercise What is in |\skip5| after the following three commands have
acted?
\begintt
\setskip5=0pt plus 1pt \advskip5 by\skip4 \advskip5 by -\skip4
\endtt
\answer If\/ |\skip4| has infinite stretchability, |\skip5| will be zero;
otherwise it will be $0\pt$ plus@$1\pt$.
\dangerexercise (For mathematicians.) Explain how to ↑{round} |\dimen2|
to the nearest multiple of\/ |\dimen3|, assuming that |\dimen3| is nonzero.
\answer |\advdimen2 by.5dm3 \divdimen2 by\dimen3 \multdimen2 by\dimen3|.
\danger The registers obey \TeX's ↑{group structure}. For example,
changes to |\count3| inside |{...}| will not affect the value of
|\count3| outside. Therefore \TeX\ effectively has more than 256
registers of each type. If you want the effect of a register command
to transcend its group, you must say |\global| just before the
|\set...|, |\adv...|, |\mult...|, or |\div...|\null\ command.
\dangerexercise What is in |\count1| after the following sequence of commands?
\begintt
\setcount1=5
{\setcount1=2 \global\advcount1by\count1\advcount1by\count1}
\endtt
\answer |\count1| takes the values 5, 2 (save@5), 4 (make this global),
8 (save@4); finally the value@4 is restored, and this is the answer.
\danger Besides the numerical registers, \TeX\ also has 256 box registers
called ↑{*box}|0| to |\box255|. A@box register gets a value when you say
↑{*setbox}\<number>|=|↑{<box}; for example, `|\setbox3=\hbox{A}|' sets
|\box3| to an hbox that contains the single letter@|A|. Several other
examples of\/ |\setbox| have already appeared in Chapter@12. Chapter@10
points out that `|2dp3|' is a \<dimen> that represents twice the depth
of\/ |\box3|.
\danger Box registers are local to groups just as arithmetic registers are.
But there's a big difference between box registers and all the rest: When
you use a |\box|, it loses its value. For example, the construction
`|\raise2pt\box3|' in a horizontal list not only puts the contents of
|\box3| into the list after raising it by@$2\pt$, it also makes |\box3|
empty. \TeX\ does this for efficiency, since it is desirable to avoid copying
the contents of potentially large boxes. If you want to use a box register
without wiping out its contents, just say `↑{*copy}' instead of `|\box|';
for example, `|\raise2pt\copy3|'.
\danger Another way to use a box register is to extract the inside of
a box by saying `↑{*unhbox}'. This annihilates the contents of the
register, like `|\box|' does, and it also removes one level of boxing. For
example, the commands
\begintt
\setbox3=\hbox{A} \setbox3=\hbox{\box3 B}
\setbox4=\hbox{A} \setbox4=\hbox{\unhbox4 B}
\endtt
put |\hbox{\hbox{A}B}| into |\box3| and |\hbox{AB}| into |\box4|.
Similarly, ↑{*unvbox} unwraps a vbox. If you want to construct a large
box by accretion (e.g., a ↑{table of contents}), it is best to use
|\unhbox| or |\unvbox| as in the |\setbox4| example; otherwise you use
more of \TeX's memory space, and you might even obtain boxes inside boxes
nested to such a deep level that hardware or software limits are exceeded.
\danger The operations ↑{*unhcopy} and ↑{*unvcopy} are related to
|\unhbox| and |\unvbox| as |\copy| is to |\box|. \ (But their
names are admittedly peculiar.)
\danger An unboxing operation ``unsets'' any glue that was set at the box's
outer level. For example, consider the sequence of commands
\begintt
\setbox5=\hbox{A \hbox{B C}} \setbox6=\hbox to 1.05wd5{\unhcopy5}
\endtt
This makes |\box6| five percent wider than |\box5|;
the glue between |A| and |\hbox{B C}| stretches to
make the difference, but the glue inside the inner hbox does not change.
\danger A box register is either ``↑{void}'' or it contains an hbox or a vbox.
There is a difference between a void register and one that contains an
empty box whose height, width, and depth are zero; for example, if\/ |\box3|
is void, you can say |\unhbox3| or |\unvbox3| or |\unhcopy3| or |\unvcopy3|,
but if\/ |\box3| is equal to |\hbox{}| you can say only |\unhbox3| or
|\unhcopy3|. If you say `|\global\setbox3=|\<box>', register |\box3| will
become ``globally void'' when it is subsequently used or unboxed.
\dangerexercise What is in register |\box5| after the following commands?
\begintt
\setbox5=\hbox{A} \setbox5=\hbox{\copy5\unhbox5\box5\copy5}
\endtt
\answer |\hbox{\hbox{A}A}|. After `|\unhbox5|', |\box5| is void; |\copy5|
yields nothing.
\dangerexercise And what is in |\box3| after this? \ (Notice the ↑{grouping}.)
\begintt
{\global\setbox3=\hbox{A} \setbox3=\hbox{B}}
\endtt
\answer |\hbox{A}|. But after `|{\global\setbox3=\hbox{A}\setbox3=\box3}|',
|\box3| will be void.
\danger If you are unsure about how \TeX\ operates on its registers, you can
experiment online by using certain `|\show|' commands. For example,
\begintt
\showthe\count1 \showthe\dimen2 \showthe\skip3
\endtt
will display the contents of\/ |\count1|, |\dimen2|, and |\skip3|; and
`↑{*showbox}|4|' will display the contents of\/ |\box4|. ↑(*showthe)
Box contents will appear only on the log file, unless you say
`|\tracingonline=1|'; plain \TeX\ provides a macro `↑{:tracingall}'
that turns on every possible mode of interaction, including
↑{*tracingonline}. The author used these features to check the answers to
several of the exercises above.
\danger The first ten |\count| registers, |\count0| through |\count9|,
are reserved for a special purpose:
\TeX\ displays these ten counts on your terminal whenever outputting
a page, and it transmits them to the output file as an identification
of that page. The counts are separated by decimal points on your terminal,
with trailing `|.0|' patterns suppressed. Thus, for example, if
|\count0=5| and |\count2=7| when a page is being shipped out to the
↑{.dvi} file, and if the other count registers are zero, \TeX\ will
type `|[5.0.7]|'. Plain \TeX\ uses |\count0| for the page number, and it
keeps |\count1| to@|\count9| zero; that is why you see just `↑{.[1]}'
when page@1 is being output. In more complex applications the page
numbers can have further structure; ten counts are shipped out
so that there will be plenty of identification.
\danger Large applications of \TeX\ make use of different sets of macros
written by different groups of people; and chaos would reign if a register
like |\count100|, say, were being used simultaneously for different purposes
in different macros. Therefore plain \TeX\ provides an allocation facility;
cooperation will replace confusion if each ↑{macro writer} uses these
conventions. The idea is to say, e.g., `↑{:newcount}' when you want
to dedicate a |\count| register to a special purpose. For example, the
author designed a macro called `|\exercise|' to format the exercises
in this manual, and one of the features of\/ |\exercise| is that it
computes the number of the current exercise. The format
macros in Appendix@E reserve a |\count| register for this purpose by saying
\begintt
\newcount\exno
\endtt
and then the command
`|\setcount\exno=0|'
is used at the beginning of each chapter. Similarly, `|\advcount\exno by1|'
is used whenever a new exercise comes along, and `|\the\count\exno|'
is used to typeset the current exercise number. The |\newcount| macro
assigns a unique number to its argument |\exno|, and all of the other
macros are written without the knowledge of exactly what that number is.
Notice that |\exno| is not the exercise number; it is the register number
where that exercise can be found. \ (The control sequence |\exno| actually
is defined by a ↑{*chardef} instead of by a@↑{*def}; it doesn't expand
to a decimal integer. This uses \TeX's memory more efficiently, and it
prevents confusion in case |\exno| is immediately followed by another digit.)
\danger Besides |\newcount|, plain \TeX\ provides ↑{:newdimen},
↑{:newskip}, ↑{:newmuskip}, and ↑{:newbox}; there also are
↑{:newread}, ↑{:newwrite}, ↑{:newfam}, and ↑{:newinsert}, for
features we haven't discussed yet. Appendices@B and@E contain
several examples of the proper use of allocation.
\newcount\notenumber
\def\clearnotenumber{\setcount\notenumber=0}
\def\note{\advcount\notenumber by1 \footnote{$↑{\the\count\notenumber}$}}
\clearnotenumber
\dangerexercise Design a |\note| macro that produces footnotes numbered
sequentially. For example,\note{First note.} it should produce
the footnotes here\note{Second note.} if you type
\begintt
... example,\note{First note.} it should produce
the footnotes here\note{Second note.} if ...
\endtt
(Use |\newcount| to allocate a |\count| register for the footnotes.)
\answer |\newcount\notenumber|\parbreak
|\def\clearnotenumber{\setcount\notenumber=0}|\parbreak
|\def\note{\advcount\notenumber by1|\parbreak
| \footnote{$↑{\the\count\notenumber}$}}|
\danger Sometimes, however, you want to use a register just for temporary
storage, and you know that it won't conflict with anybody else's macros.
Registers |\count255|, |\dimen255|, |\skip255|, and |\muskip255| are
traditionally kept available for such purposes. Furthermore, Plain \TeX\
reserves |\dimen0| to |\dimen9|, |\skip0| to |\skip9|, |\muskip0| to
|\muskip9|, and |\box0| to |\box9| for ``scratchwork''; these registers
are never allocated by the |\new...|\null\ operations. We have seen that
|\count0| through |\count9| are special, and |\box255| also turns out to
be special; so those registers should be avoided unless you know what you
are doing.
\ddanger Of course any register can be used for short-term purposes inside
a group (including |\count0| to |\count9| and |\box255|, and including
registers that have been allocated for other purposes), since register
changes are local to ↑{groups}. However, you should be sure that \TeX\
will not output any pages before the group has ended, because ↑{output
routines} might otherwise be invoked at unfortunate times. \TeX\ is
liable to invoke an output routine whenever it tries to move something
from the list of recent contributions to the current page, because it
might discover a page break with $c=\infty$ then. Here is a list of the
times when that can happen: (a)@At the beginning or end of a paragraph,
provided that this paragraph is being contributed to the main vertical
list. (b)@At the beginning or end of a displayed equation within such a
paragraph. (c)@After completing an |\halign| in vertical mode. (d)@After
contributing a box or penalty or insertion to the main vertical list.
(e)@After an |\output| routine has ended.
\ddanger Now that we are armed with the knowledge of \TeX's flexible
registers, we can plunge into the details of insertions. There are 255
classes of insertions, |\insert0| to |\insert254|, and they are tied
to other registers of the same number. For example, |\insert100| is
connected with |\count100|, |\dimen100|, |\skip100|, and |\box100|.
Therefore plain \TeX\ provides an allocation function for insertions
as it does for registers; Appendix@B includes the command
\begintt
\newinsert\footins
\endtt
↑(:newinsert)
which defines |\footins| as the number for footnote insertions. Other
commands that deal with footnotes refer to |\count\footins|, |\dimen\footins|,
and so on. The macros for floating topinserts are similarly prefaced by
`|\newinsert\topins|', which defines |\topins| as the number of their
class. Each class of insertions is independent, but \TeX\ preserves the
order of insertions within a class. It turns out that |\footins| is class@254,
and |\topins| is class@253, but the macros do not use such numbers directly.
\def\n{\thinspace$n$}
\ddanger For our purposes let's consider a particular class of insertions
called class $n$; we will then be dealing with \TeX's primitive command
↑(*insert)
$$\displaybox{|\insert|\n|{|\<vertical mode material>|}|}$$
which puts an insertion
item into a horizontal or vertical list. For this class of insertions
$$\halign{\indent#\hfil\cr
|\box|\n\ is where the material appears when a page is output;\cr
|\count|\n\ is the magnification factor for page breaking;\cr
|\dimen|\n\ is the maximum insertion size per page;\cr
|\skip|\n\ is the extra space to allocate on a page.\cr}$$
For example, material inserted with |\insert100| will
eventually appear in |\box100|.
\ddanger Let the natural height plus depth of\/ |\insert|\n\ be $x$; then
|\count|\n\ is 1000 times the factor by which $x$@affects the page
goal. For example, plain \TeX\ says |\setcount\footins=1000|, since
there is a one-to-one relationship:
a 10-point footnote effectively makes a page $10\pt$ shorter. But if we
have an application where footnotes appear in double columns, a count
value of 500 would be appropriate. One of the insertion classes in Appendix@E
makes marginal notes for proofreading purposes; in that case the
count value is zero. No actual magnification is done; |\count|\n\
is simply a number used for bookkeeping, when estimating the
costs of various page breaks.
\ddanger The first footnote on a page requires extra space, since we want
to separate the footnotes from the text, and since we want to output a
horizontal rule. Plain \TeX\ says
`|\setskip\footins=\skip\bigskipregister|'; this means that a bigskip of
extra space is assumed to be added by the output routine to any page that
contains at least one insertion of class |\footins|.
\ddanger Sometimes it is desirable to put a maximum limitation on the size
of insertions; for example, people usually don't want an entire page to
consist of footnotes. Plain \TeX\ says |\setdimen\footins=8in|; this means
that |\box\footins| is not supposed to accumulate more than 8@inches of
footnotes for any one page.
\ddanger You might want to review the page-breaking algorithm explained
at the beginning of this chapter, before reading further.
On the other hand, maybe you don't really want to read the rest of
this chapter at all, ever.
\ddanger Here now are the details of what happens when an |\insert|\n\
is moved from the ``recent contributions'' to the ``current page.'' \
(Remember that such a move does not mean that the insertion will actually take
place; the current page will be backed up later, to the breakpoint of
least cost, and only the insertions preceding that breakpoint will
actually be performed.) \ Let $g$ and $t$ be the current ↑{*pagegoal} and
↑{*pagetotal}; let $q$ be the ↑{*insertpenalties} accumulated for the
current page; and let $d$ and $z$ be the current ↑{*pagedepth} and
↑{*pageshrink}. \ (The value of@$d$ is at most ↑{*maxdepth}; this value
has not yet been incorporated into $t$.) \ Finally, let $x$ be the
natural height plus depth of the |\insert|\n\ that we are moving to the
current page; and let $f$ be the corresponding magnification factor,
i.e., |\count|\n\ divided by 1000.
\ninepoint
\textindent{\bf Step 1.} If there is no previous |\insert|\n\ on the
current page, decrease $g$ by $hf+w$, where $h$ is the current height plus
depth of\/ |\box|\n, and where $w$ is the natural space component of
|\skip|\n; also include the stretch and shrink components of
|\skip|\n\ in the totals for the current page (in particular, this
affects@$z$).
\medbreak
\textindent{\bf Step 2.} If a previous |\insert|\n\ on the current page
has been split, add the parameter called
↑{*floatingpenalty} to@$q$, and omit Steps 3 and@4.
\medbreak
\textindent{\bf Step 3.} Test if the current insertion will fit on the page
without splitting. This means that it will not make the height plus depth
of\/ |\box|\n\ surpass |\dimen|\n, when it is added to |\box|\n\ together with
all previous |\insert|\n\ amounts on the current page; furthermore, it
means that either $xf\le0$ or $t+d+xf-z\le g$. If both tests are passed,
subtract $xf$ from@$g$ and omit Step@4.
\medbreak
\textindent{\bf Step 4.} (The current insertion will be split, at least
tentatively; but the split will not actually take place if the least-cost
page turns out to have occurred earlier than the present insertion.) \
First compute the largest amount@$v$ such that a height plus depth of $v$
will not make the total insertions into |\box|\n\ bigger than |\dimen|\n,
and such that $t+d+vf\le g$. \ (Notice that $z$ is omitted from the latter
formula, but the available shrinkability was considered in Step@3 when we
tried to avoid splitting.) \ Then find the least-cost way to split the
beginning of the vertical list of the insertion so as to obtain a box of
height@$v$. \ (Use an algorithm just like page-breaking, but without the
complexity of insertion; an additional `|\penalty-10000|' item is assumed
to be present at the end of the vertical list, to ensure that a legal
breakpoint exists.) \ Let $u$ be the natural height plus depth of that
least-cost box, and let $r$ be the penalty associated with the optimum
breakpoint. Decrease $g$ by@$uf$, and increase $q$ by@$r$. \ (If
↑{*tracingpages}|=1|, the log file will now get a cryptic message that says
`|%split|\n\ $v$|,|$u$ $p$|=|$r$'; for example,
\begintt
%split254 180.2,175.3 p=100
\endtt
↑(percent split)
means that \TeX\ tried to split an |\insert254| to $180.2\pt$; the
best split was actually $175.3\pt$ tall, and the penalty for breaking
there was@100.)
\ddanger This algorithm is admittedly complicated, but no simpler mechanism
seems to do nearly as much. Notice that penalties of $-10000$ inside
insertions will make certain splits very attractive in Step@4, so the
user can provide hints about where to break, in difficult situations.
The interesting thing is that the algorithm can be adapted to behave
in a variety of different ways. Floating insertions are accommodated
as a special case of split insertions, by making each floating topinsert
start with |\penalty0|, so that it can split for free, and by having
zero as the associated |\floatingpenalty|; non-floating insertions
like footnotes are accommodated by associating larger penalties with
split insertions (see Appendix@B).
\ddanger The splitting operation mentioned in Step 4 is also available as
a primitive: `↑{*vsplit}\<number> |to|\<dimen>' produces a vbox obtained
by splitting off a speci\-fied amount of material from a box register.
For example,
\begintt
\setbox200=\vsplit100 to 50pt
\endtt
sets |\box200| to a vbox whose height is $50\pt$; it goes through the
vertical list inside |\box100| (which should be a vbox), and finds the
least-cost break assuming a goal height of@$50\pt$, considering badnesses and
penalties just as in the case of page-breaking (but with $q=0$). The
algorithm uses ↑{*splitmaxdepth} instead of\/ ↑{*maxdepth} to govern the
maximum depth of boxes. Then it prunes the top of\/ |\box100| by removing
everything up to and including any ↑{discardable} items that immediately
follow the optimum breakpoint; and it uses
↑{*splittopskip} to insert new glue at the new top of\/ |\box100|, just as
↑{*topskip} glue appears at the top of a page. However, if the optimum
breakpoint occurs at the end of the vertical list inside |\box100|---a
`|\penalty-10000|' item is assumed to be present there---or if all items
after the optimum breakpoint are discarded, |\box100| will
be void after the |vsplit|. And if\/ |\box100| was void before the
|\vsplit|, both |\box100| and |\box200| will be void afterwards.
\ddanger You had better not change |\box|\n, |\count|\n, |\dimen|\n,
or |\skip|\n\ while \TeX\ is contributing insertions to the current page,
since the algorithm described above assumes that those quantities are static.
But you can change ↑{*floatingpenalty}, |\splittopskip|, and |\splitmaxdepth|;
\TeX\ will use the values that were current just inside the closing right
brace of `|\insert|\n|{...}|' when it splits and floats insertions.
For example, Appendix@B uses |\floatingpenalty=20000| in footnote
insertions, in order to discourage footnotes that split before others
can start; but the |\floatingpenalty| is zero in floating topinserts.
Appendix@B also uses special values of\/ |\splittopskip| and |\splitmaxdepth|,
together with ↑{struts}, so that split footnotes will be typeset with
the same spacing as unsplit ones.
\ddanger The |\footnote| macro puts an |\insert| into the horizontal list
of a paragraph. After the paragraph has been broken into lines, this
insertion will move out into the vertical list just after the line that
contained it (see Chapter@14). Since there is no legal breakpoint between
that box (i.e., that line) and the insertion, \TeX\ will put the insertion
onto the page that contains the line that contains the insertion.
\ddangerexercise Study the page-breaking algorithm carefully. Is it
possible that a footnote might not appear on the same page as its reference?
\answer Yes, in severe circumstances. (1)@If there is no other legal
breakpoint, \TeX\ will take a break whose cost is@$\infty$. (2)@If
|\vadjust{\eject}| occurs on the same line as a footnote, before that
footnote, the reference will be forcibly detached. (3)@Other |\vadjust|
commands on that line could also interpose breakpoints before the insertion.
\ddanger When the best page break is finally chosen, \TeX\ removes everything
after the chosen breakpoint from the bottom of the ``current page,''
and puts it all back at the top of the
``recent contributions.'' The
chosen breakpoint itself is placed at the very top of the recent contributions.
If it is a penalty item, the value of the penalty is recorded in
↑{*outputpenalty} and the penalty in the contribution list is changed
to $10000$; otherwise |\outputpenalty| is set to zero.
The insertions that remain on the current page are of three kinds: For
each class@$n$ there are unsplit insertions, followed possibly by
a single split insertion, followed possibly by others. The unsplit insertions
are appended to |\box|\n, with no interline glue between them. \
(↑{Struts} should be used, as in the |\vfootnote| macro of Appendix@B\null.)
If a split insertion is present, it is effectively |\vsplit| to the size
that was computed previously in Step@4; the top part is treated as an
unsplit insertion, and the remainder (if any) is converted to an insertion
as if it had not been split. This remainder, followed by any other floating
insertions of the same class, is held
over in a separate place. (They will show up on the ``current page'' if
↑{*showlists} is used while an ↑{*output} routine is active; the total
number of such insertions appears in ↑{*insertpenalties} during an
|\output| routine.) \
The non-insertion items before the best break on the current page are put
together in a |\vbox| of height@$g$, where $g$ was the |\pagegoal| at the
time of the break; this box becomes |\box255|. Now the user's ↑{*output}
↑(*box255)
routine enters \TeX's scanner (see Chapter@23); its duty is to assemble the
final pages based on the contents of\/ |\box255| and any insertion boxes
that it knows about. The output routine will probably unbox those boxes,
so that their glue can be reset; the glue in insertion boxes usually
cooperates nicely with the glue on the rest of the page, when it is
given a chance. After the |\output| routine is finished, ↑{held-over
insertion} items are placed first on the list of recent contributions, followed
by the vertical list constructed by |\output|, followed by the recent
contributions beginning with the page break. \ (Deep breath.) \ You got that?
\ddanger An insertion that appears inside of a box is dormant, but it is
not necessarily lost forever. Such an insertion comes to life if the
enclosing box is subsequently unboxed and contributed to the current page;
an output routine might do this.
\endchapter
Since it is impossible to foresee how [footnotes] will happen to come out
in the make-up, it is impracticable to number them from 1 up on each page.
The best way is to number them consecutively
throughout an article or by chapters in a book.
\author UNIVERSITY OF ↑{CHICAGO} PRESS, {\sl Manual of Style\/} (1910) % p102
\bigskip
Don't use footnotes in your books, Don.
\author JILL ↑{KNUTH} (1962)
\eject
\beginchapter Chapter 16. Typing\\Math Formulas
\TeX\ is designed to handle complex ↑{mathematical formulas} in such a way
that most of them are easy to input. The basic idea is that a complicated
formula is composed of less complicated formulas put together in a simple way;
the less complicated formulas are, in turn, made up of simple
combinations of formulas that are even less complicated; and so on. Stating
this another way, if you know how to type simple formulas and how to combine
formulas into larger ones, you will be able to handle virtually any formula
at all. So let's start with simple ones and work our way up.
The simplest formula is a single letter, like `$x$', or a single number,
like `2'. In order to put these into a \TeX\ text, you type `|$x$|' and
`|$2$|', respectively. Notice that all mathematical formulas are enclosed
in special math brackets; we are using |$| as the math bracket in this
manual, in accord with the plain \TeX\ format defined in Appendix@B\null,
because mathematics is supposedly expensive.
When you type `|$x$|' the `$x$' comes out in italics,
but when you type `|$2$|' the `$2$' comes out normally. In general, all
characters on your keyboard have a special interpretation in math
formulas, according to the normal conventions of mathematics printing:
Letters now denote ↑{italic} letters, while digits and punctuation
denote ↑{roman} digits and punctuation; a hyphen ({\tt-}) now denotes a ↑{minus
sign} ($-$), which is almost the same as an em-dash but not quite (see
Chapter@2). The first |$| that you type puts you into ``↑{math mode}'' and the
second takes you out (see Chapter@13). So if you forget one |$| or type
one |$| too many, \TeX\ will probably become thoroughly confused and you
will probably get some sort of error message. ↑(dollarsign)
Formulas that have been typeset by a printer who is unaccustomed to
mathematics usually look quite strange to a mathematician, because a
novice printer usually gets the spacing all wrong. In order to alleviate
this problem, \TeX\ does most of its own spacing in math formulas; and it
{\sl ignores\/} any ↑{spaces} that you yourself put between |$|'s. For
example, if you type `|$ x$|' and `|$ 2 $|', they will mean the same thing
as `|$x$|' and `|$2$|'. You can type `\hbox{|$(x + y)/(x - y)$|}' or
`|$(x+y) / (x-y)$|', but both will result in `$(x+y)/(x-y)$', a formula in
which there is a bit of extra space surrounding the $+$ and@$-$ signs but
none around the@/@sign. Thus, you do not have to memorize the complicated
rules of math spacing, and you are free to use blank spaces in any way you
like. Of course, spaces are still used in the normal way to mark the end
of control sequences, as explained in Chapter@7. In most circumstances
\TeX's spacing will be what a mathematician is accustomed to; but we will
see in Chapter@18 that there are control sequences by which you can
override \TeX's spacing rules if you want to.
One of the things mathematicians like to do is make their formulas look
like ↑{Greek} to the uninitiated. In plain \TeX\ language you can type
`|$$\alpha, \beta, \gamma, \delta;$$|' and you will get the first four
Greek letters ↑(:alpha)↑(:beta)↑(:gamma)↑(:delta)
$$\alpha,\beta,\gamma,\delta;$$ furthermore there are upper-case Greek
letters like `$\Gamma$', which you can get by typing `|$\Gamma$|'.
↑(:Gamma) Don't feel intimidated if you aren't already familiar with Greek
letters; they will be easy to learn if you need them. The only difficulty
is that some symbols that look nearly the same must be carefully
distinguished. For example, the Greek letters ↑{:nu}@($\nu$) and
↑{:kappa}@($\kappa$) should not be confused with the italic letters $v$
and@$x$; the Greek ↑{:phi}@($\phi$) is different from the slashed zero
called ↑{:emptyset}@($\emptyset$). A@lower-case epsilon ($\epsilon$) is
quite different from the symbol used to denote membership in a set
($\in$); type `|$\epsilon$|' for $\epsilon$ and `|$\in$|' for $\in$.
↑(:epsilon) ↑(:in) Some of the lower-case Greek letters have variant
forms in plain \TeX's math italic fonts:
`|$(\phi,\theta,\epsilon,\pi)$|' yields
`$(\phi,\theta,\epsilon,\pi)$' while
`|$(\varphi,\vartheta,\varepsilon,\varpi)$|' yields
`$(\varphi,\vartheta,\varepsilon,\varpi)$'.
↑(:phi)↑(:theta)↑(:pi)↑(:varphi)↑(:vartheta)↑(:varpi)↑(:varepsilon)
Besides Greek letters, there are a lot of ↑{funny symbols} like `$\approx$'
(which you get by typing `|$\approx$|') ↑(:approx)↑(special symbols for math)
and `$\mapsto$' (which you ↑(:mapsto)↑(math symbols)
get by typing `|$\mapsto$|').
A complete list of these control sequences
and the characters they correspond to appears in Appendix@F\null.
Such control sequences are allowed only in math mode, i.e., between
|$|'s, because the corresponding symbols appear in the math fonts.
\exercise What should you type to get the formula
`$\gamma+\nu\in\Gamma$'\thinspace?
\answer |$\gamma+\nu\in\Gamma$|.
\exercise Look at Appendix F to discover the control sequences for
`$\le$', `$\ge$', and@`$\ne$'. \ (These are probably the three most
commonly used math symbols that are not present on your keyboard.)
\ What does plain \TeX\ call them?
\answer ↑{:le}, ↑{:ge}, and ↑{:ne}. \ (These are short for ``less-or-equal,''
``greater-or-equal,'' and ``not-equal.'') \ You can also use the names
↑{:leq}, ↑{:geq}, and ↑{:neq}. \ (The fourth most common symbol is, perhaps,
`$\infty$', which stands for ``↑{infinity}'' and is called `↑{:infty}'.)
Now let's see how the more complex formulas get built up from simple ones.
In the first place, you can get ↑{superscripts} up high and ↑{subscripts}
↑(indices, see subscripts)
↑(superiors, see superscripts) ↑(inferiors, see subscripts)
down low by using `|↑|' and `|_|', as shown in the following examples:
\beginmathdemo
\it Input&\it Output\cr
\noalign{\vskip2pt}
|$x↑2$|&x↑2\cr
|$x_2$|&x_2\cr
|$2↑x$|&2↑x\cr
|$x↑2y↑2$|&x↑2y↑2\cr
|$x ↑ 2y ↑ 2$|&x ↑ 2y ↑ 2\cr
|$x_2y_2$|&x_2y_2\cr
|$_2F_3$|&_2F_3\cr
\endmathdemo
Notice that |↑| and |_| apply only to the next single character.
If you want several things to be superscripted or subscripted, just enclose
them in braces:
\beginmathdemo
|$x↑{2y}$|&x↑{2y}\cr
|$2↑{2↑x}$|&2↑{2↑x}\cr
|$2↑{2↑{2↑x}}$|&2↑{2↑{2↑x}}\cr
|$y_{x_2}$|&y_{x_2}\cr
|$y_{x↑2}$|&y_{x↑2}\cr
\endmathdemo
The braces in these examples have been used to specify ``↑{subformulas},''
i.e., simpler parts of a larger formula. \TeX\ makes a box for each
subformula, and treats that box as if it were a single symbol. Braces
also serve their usual purpose of grouping, as discussed in Chapter@5.
It is illegal to type `|x↑y↑z|' or `|x_y_z|'; \TeX\ will
complain of a ``double superscript'' or ``double subscript.'' You must type
`|x↑{y↑z}|' or `|x↑{yz}|' or
`|x_{y_z}|' or `|x_{yz}|' in order to make your intention clear.
A superscript or subscript following a character applies to that character
only; but when following a subformula it applies to that whole subformula,
and it will be raised or lowered accordingly. For example,
\beginmathdemo
|$((x↑2)↑3)↑4$|&((x↑2)↑3)↑4\cr
|${({(x↑2)}↑3)}↑4$|&{({(x↑2)}↑3)}↑4\cr
\endmathdemo
In the first formula the `|↑3|' and `|↑4|' are superscripts on the ↑{right
parentheses}, i.e., on the `|)|' characters that immediately precede them,
but in the second formula they are superscripts on the subformulas that
are enclosed in braces. The first alternative is preferable, because it is
much easier to type and it is just as easy to read.
\danger A subscript or superscript following nothing (as in the `|_2F_3|'
example on the preceding page, where the `|_2|' follows nothing) is taken
to mean a subscript or superscript of an empty subformula. Such notations
are (fortunately) rare in mathematics; but if you do encounter them it is
better to make your intention clear by showing the empty subformula
explicitly with braces. In other words, the best way to get `${}_2F_3$'
in a formula is to type `|{}_2F_3|' or `|{_2}F_3|' or `|{_2F_3}|'.
\dangerexercise What difference, if any, is there between the output of
`|$x + _2F_3$|' and the output of `|$x + {}_2F_3|'\thinspace?
\answer In the former, the `|_2|' applies to the plus sign ($x + _2F_3$);
but in the latter, it applies to an empty subformula ($x + {}_2F_3$).
\dangerexercise Describe the differences between the outputs of `|${x↑y}↑z$|'
and `|$x↑{y↑z}$|'.
\answer The results are `${x↑y}↑z$' and `$x↑{y↑z}$'; the $z$ in the first
alternative is the same size as the $y$, but in the second it is smaller.
Furthermore, the $z$'s aren't quite at the same height. \ (Good typists never
even think of the first construction, because mathematicians never want it.)
You can have simultaneous subscripts and superscripts, and you can specify them
in any order:
\beginmathdemo
|$x↑2_3$|&x↑2_3\cr
|$x_3↑2$|&x_3↑2\cr
|$x↑{31415}_{92}+\pi$|&x↑{31415}_{92}+\pi\cr
\noalign{\smallskip}
|$x_{y↑a_b}↑{z_c↑d}$|&x_{y↑a_b}↑{z_c↑d}\cr
\endmathdemo
Notice that simultaneous su$\rm_b↑{per\kern-1pt}$scripts are positioned
over each other. However, a subscript will be ``tucked in'' slightly when it
follows certain letters; for example, `|$P_2↑2$|' produces `$P_2↑2$'.
If for some reason you want the left edges of both subscript and superscript
to be aligned, you can fool \TeX\ by making the letter into a subformula:
`|${P}_2↑2$|' produces `${P}_2↑2$'.
The control sequence ↑{:prime} stands for the symbol `$\prime$', which
is used mostly in superscripts. In fact, `$\prime$' is so big as it stands
that you would never want to use it except in a subscript or superscript,
where it occurs in a smaller size. Here are some typical examples:
\beginmathdemo
\it Input&\it Output\cr
\noalign{\vskip2pt}
|$y_1↑\prime$|&y_1↑\prime\cr
|$y_2↑{\prime\prime}$|&y_2↑{\prime\prime}\cr
|$y_3↑{\prime\prime\prime}$|&y_3↑{\prime\prime\prime}\cr
\endmathdemo
Since single and double primes occur rather frequently, plain \TeX\
provides a convenient abbreviation: You can simply type |'| instead
of |↑\prime|, and |"| instead of |↑{\prime\prime}|.
\beginmathdemo
|$f'[g(x)]g'(x)$|&f'[g(x)]g'(x)\cr
|$y_1'+y_2"$|&y_1'+y_2"\cr
|$y'_1+y"_2$|&y'_1+y"_2\cr
\endmathdemo
\exercise What happens to you if you type |$f''(x)$| instead of
|$f"(x)$|?
\answer \TeX\ complains of a double superscript, because
`|$f↑\prime↑\prime$|' is illegal. \ (If you want to type the formula
$f'{}↑{2}(x)$, the correct way is |$f'{}↑{2}(x)$|. Appendix@B doesn't
define |'| to be an abbreviation for |↑\prime{}| because that would
give the wrong result in formulas like |f'_n|.)
\dangerexercise Why do think \TeX\ treats |\prime| as a large symbol
that appears only in superscripts, instead of making it a smaller
symbol that has already been shifted up into the superscript position?
\answer The second alternative doesn't work properly when there's a
subscript at the same time as a prime. Furthermore, some mathematicians
use |\prime| also in the subscript position; they write, for example,
$F'(w,z)=\partial F(w,z)/\partial z$ and $F_\prime(w,z)=\partial F(w,z)/
\partial w$.
\dangerexercise Mathematicians sometimes use ``↑{tensor notation}''
in which subscripts and superscripts are staggered, as in `$R_i{}↑{jk}{}_l$'.
Explain how to achieve such an effect.
\answer |$R_i{}↑{jk}{}_l$|.
Another way to get complex formulas from simple ones is to use the control
sequences ↑{:sqrt}, ↑{*underline}, or ↑{*overline}. ↑(surds, see sqrt)
Like |↑| and |_|, these operations apply to the character or subformula
that follows them:
\beginmathdemo
|$\sqrt2$|&\sqrt2\cr
|$\sqrt{x+2}$|&\sqrt{x+2}\cr
|$\underline4$|&\underline4\cr
|$\overline{x+y}$|&\overline{x+y}\cr
|$\overline x+\overline y$|&\overline x+\overline y\cr
|$x↑{\underline n}$|&x↑{\underline n}\cr
|$\sqrt{x↑3+\sqrt\alpha}$|&\sqrt{x↑3+\sqrt\alpha}\cr
\endmathdemo
You can also get cube roots `$\root3\of{\phantom{h}}$' and similar things
by using ↑{:root}:
\beginmathdemo
|$\root 3 \of 2$|&\root 3 \of 2\cr
|$\root n \of {x↑n+y↑n}$|&\root n \of {x↑n+y↑n}\cr
|$\root n+1 \of a$|&\root n+1 \of a\cr
\endmathdemo
\danger The |\sqrt| and |\underline| and |\overline| operations are able to
place lines above or below subformulas of any size or shape; the bar lines
change their size and position, so that they are long enough to cover the
subformula, and high enough or low enough not to bump into it. For example,
consider `|\overline|@|l|' ($\,\overline l\,$) versus `|\overline|@|m|'
($\,\overline m\,$): the first has a shorter bar line, and this line has
been raised higher than the bar in the second. Similarly, the bar in
`|\underline|@|y|' ($\,\underline y\,$) is lower than the bar in
`|\underline|@|x|' ($\,\underline x\,$); and square root
signs appear in variety of positions based on the height and depth of what is
being sqrted: $\sqrt a + \sqrt d + \sqrt y$. \TeX\ knows the height, depth, and
width of every letter and every subformula, because it considers them to be
boxes, as explained in Chapter@11. If you have a formula in which there is
only one |\sqrt|, or only one |\overline| or |\underline|, the normal
positioning rules work fine; but sometimes you want to have uniformity
between different parts of a complex formula. For example, you might want to
typeset `$\sqrt{\mathstrut a}+\sqrt{\mathstrut d}+\sqrt{\mathstrut y}$',
putting all square roots in the same vertical position. There's an easy way
to do this, using the control sequence ↑{:mathstrut} as follows:
\begintt
$\sqrt{\mathstrut a}+\sqrt{\mathstrut b}+\sqrt{\mathstrut y}$.
\endtt
A |\mathstrut| is an invisible box whose width is zero; its height and depth
are the height and depth of a parenthesis `('. Therefore subformulas
that contain |\mathstrut| will always have the same height and depth,
unless they involve more complicated constructions like subscripts and
superscripts. Chapter@18 discusses more powerful operations called ↑{:smash}
and ↑{:phantom} by which you can obtain complete control over the positioning
of roots and similar signs.
\exercise Test your understanding of what you have read so far in this chapter
by explaining what should be typed to get the following formulas. \ (Be
sure to check your answer with Appendix@A to confirm that you're right.)
$$\hbox to\the\hsize{\indent$\displaystyle
10↑{10}\hfil 2↑{n+1}\hfil (n+1)↑2\hfil \sqrt{1-x↑2}\hfil
\overline{w+\overline z}\hfil p_1↑{e_1}\hfil a_{b_{c_{d_e}}}\hfil
\root3\of{h"_n(\alpha x)}\hfil$}$$
\answer |10↑{10}|; |(n+1)↑2|; |\sqrt{1-x↑2}|; |\overline{w+\overline z}|;
|p_1↑{e_1}|; |a_{b_{c_{d_e}}}|; |\root3\of{h"_n(\alpha x)}|.
\ (Of course, you should enclose these
formulas in dollar signs so that \TeX\ will process them in math mode.
Superscripts and subscripts can be given in either order; for example,
|h"_n| and |h_n"| both work the same.
You should not leave out any of the braces shown here; for example,
`|$10↑10$|' would yield `$10↑10$'. But it doesn't hurt to insert additional
braces around letters or numbers, as in `|({n}+{1})↑{2}|'. The indicated
blank spaces are necessary unless you use extra braces; otherwise
\TeX\ will complain about undefined control sequences |\overlinez|
and |\alphax|.)
\exercise What mistake did B. C. ↑{Dull} discover after he typed the
following?
\begintt
If$ x = y$, then $x$ is equal to $y.$
\endtt
\answer He got `If$ x = y\ldots$' because he forgot to leave a space
after `|If|'; ↑{spaces} disappear between dollar signs. He should
also have ended the sentence with `|$y$.|'; punctuation that belongs
to a sentence should not be included in a formula, as we will see
in Chapter@18. \ (But you aren't expected to know that yet.)
\exercise Explain how to type the following sentence:
$$\displaybox{Deleting an element from an $n$-tuple leaves an $(n-1)$-tuple.}$$
\answer |Deleting an element from an $n$-tuple leaves an $(n-1)$-tuple.|
\exercise List all the italic letters that descend below the baseline.
\ (These are the letters for which |\underline| will lower its bar line.)
\answer $Q,f,g,j,p,q,y$. \ (The analogous ↑{Greek} letters are
$\beta,\gamma,\zeta,\eta,\mu,\xi,\rho,\phi,\varphi,\chi,\psi$.)
We have discussed the fact that the characters you type have special meanings
in math mode, but the examples so far are incomplete; they don't reveal all
the power that is at your fingertips just after you press the `|$|' key.
It's time now to go back to basics: Let us make a systematic survey of
what each character does, when it is used in a formula.
The 52 ↑{letters} (|A| to |Z| and |a| to |z|) all denote italic symbols
($A$ to $Z$ and $a$ to $z$), which a mathematician would call ``↑{variables}.''
\TeX\ just calls them ``↑{ordinary symbols},'' because they make up the
bulk of math formulas. There are two ways to type a lower-case L in plain \TeX,
namely `$l$' (which you get by simply typing `|l|') and `$\ell$'
(which you get by typing `↑{:ell}'). Although mathematicians commonly
write something that looks like `$\ell$' in their manuscripts, they
do so only to distinguish it from the numeral@`1'. This
distinguishability problem is not present in printed mathematics, since an
italic `$l$' is quite different from a@`1'; therefore it is traditional to
use `$l$' unless `$\ell$' has been specifically requested.
Plain \TeX\ also treats the 16 characters
\begintt
0 1 2 3 4 5 6 7 8 9 ! ? . |vrt / `
\endtt
↑(digits)↑(numerals)
as ordinary symbols; i.e., it doesn't insert any extra space when these
symbols occur next to each other or next to letters. Unlike the letters,
these 16 characters remain in roman type when they appear in formulas.
There's nothing special for you to remember about them, except that the
↑{vertical line} `\vrt' has special uses that we shall discuss later.
Furthermore, you should be careful to distinguish between `oh' and `zero':
The italic letter@$O$ is almost never used in formulas unless it appears
just before a left parenthesis, as in `$O(n)$'; ↑(big-$O$ notation)
and the numeral@$0$ is almost never used just before a left parenthesis
unless it is preceded by another digit, as in `$10(n-1)$'. Watch for
left parentheses and you'll be $0K$. \ (Lower-case o's also tend to
appear only before left parentheses; type `|x_0|' instead of `|x_o|',
since the formula `$x_0$' is generally more correct than `$x_o$'.)
The three characters |+|, |-|, and |*|
are called ``↑{binary operations},'' because they operate on two parts of
a formula. For example, |+|@is a ↑{plus sign}, which is used for the sum
of two numbers; |-|@is a ↑{minus sign}. The ↑{asterisk}@(|*|) is rarer
↑(star, see asterisk)
in mathematics, but it also behaves as a binary operation. Here are
some examples of how \TeX\ typesets binary operations when they appear
next to ordinary symbols:
\beginmathdemo
\it Input&\it Output\cr
\noalign{\vskip2pt}
|$x+y-z$|&x+y-z\cr
|$x+y*z$|&x+y*z\cr
|$x*y/z$|&x*y/z\cr
\endmathdemo
Notice that |-| and |*| produce quite different math symbols from what you
get in normal text: The ↑{hyphen}@(-) becomes a minus sign@($-$), and
the raised asterisk@(*) drops down to a lower level@($*$).
\danger \TeX\ does not treat |/| as a binary operation, even though a
↑{slash} stands for division (which qualifies as a binary operation on
mathematical grounds). The reason is that printers traditionally put extra
space around the symbols $+$, $-$, and@$*$, but not around@$/$. If \TeX\ were to
typeset |/| as a binary operation, the formula `|$1/2$|' would come out
`$1\mathbin/2$', which is wrong; so \TeX\ considers |/| to be an ordinary
symbol.
\danger Appendix F lists many more binary operations, for which you type
control sequences instead of single characters. Here are some examples:
\beginmathdemo
|$x\times y\cdot z$|&x\times y\cdot z\cr
|$x\circ y\bullet z$|&x\circ y\bullet z\cr
|$x\cup y\cap z$|&x\cup y\cap z\cr
|$x\sqcup y\sqcap z$|&x\sqcup y\sqcap z\cr
|$x\vee y\wedge z$|&x\vee y\wedge z\cr
|$x\pm y\mp z$|&x\pm y\mp z\cr
\endmathdemo
It is important to distinguish $\times$ (↑{:times}) from $X$ (|X|)
and from $x$ (|x|); to distinguish $\cup$ (↑{:cup}) from $U$ (|U|)
and from $u$ (|u|); to distinguish $\vee$ (↑{:vee}) from $V$ (|V|)
and from $v$ (|v|); to distinguish $\circ$ (↑{:circ}) from $O$ (|O|)
and from $o$ (|o|). ↑(:cdot)↑(:bullet)↑(:cap)↑(:sqcup)↑(:sqcap)↑(:wedge)
↑(cross, see dagger, times)
↑(:pm)↑(:mp) The symbols `$\lor$' and `$\land$' can also be called
↑{:lor} and ↑{:land}, since they frequently stand for binary operations
that are called ``↑{logical@or}'' and ``↑{logical@and}''.
\danger Incidentally, binary operations are treated as ordinary symbols
if they don't occur between two quantities that they can operate on.
For example, no extra space is inserted next to the $+$, $-$, and@$*$
in cases like the following:
\beginmathdemo
|$x=+1$|&x=+1\cr
|$3.142-$|&3.142-\cr
|$(D*)$|&(D*)\cr
\endmathdemo
Consider also the following examples, which show that binary
operations can be used as ordinary symbols in superscripts and subscripts:
\beginmathdemo
|$K_n↑+,K_n↑-$|&K_n↑+,K_n↑-\cr
|$z↑*_{ij}$|&z↑*_{ij}\cr
|$g↑\circ \mapsto g↑\bullet$|&g↑\circ \oldmapsto g↑\bullet\cr
|$f↑*(x) \cap f_*(y)$|&f↑*(x) \cap f_*(y)\cr
\endmathdemo
\dangerexercise How would you obtain the formulas `$z↑{*2}$'
and `$h_*'(z)$'\thinspace?
\answer |$z↑{*2}$| and |$h_*'(z)$|.
Plain \TeX\ treats the five characters |=|, |<|, |>|, |:|, and |~| as
``↑{relations}'' because they express a relationship between two
quantities. For example, `${x<y}$' means that $x$@is less than@$y$.
Such relationships have a rather different meaning from binary
operations like $+$, and they are typeset somewhat differently:
\beginmathdemo
|$x=y>z$|&x=y>z\cr
|$x:=y$|&x:=y\cr
|$x\le y\ne z$|&x\le y\ne z\cr
|$x~y\simeq z$|&x~y\simeq z\cr
|$x\equiv y\not\equiv z$|&x\equiv y\not\equiv z\cr
|$x\subset y\subseteq z$|&x\subset y\subseteq z\cr
\endmathdemo
↑(:le)↑(:ne)↑(:simeq)↑(colon)↑(equals)↑(lessthan)↑(greaterthan)↑(tilde)
↑(colonequals)↑(:equiv)↑(:not)↑(:subset)↑(:subseteq)
↑(hooks, see subset, supset) ↑(wiggle, see sim)
(The last several examples show some of the many other
relational symbols that plain \TeX\ makes available via control sequences;
see Appendix@F.)
The two characters `|,|' (↑{comma}) and `|;|' (↑{semicolon}) are treated
as ↑{punctuation marks in formulas}; this means that \TeX\ puts a little
extra space after them, but not before them.
\beginmathdemo
|$f(x,y;z)$|&f(x,y;z)\cr
\endmathdemo
It isn't customary to put extra space after a `|.|' (↑{period}) in
math formulas, so \TeX\ treats a period as an ordinary symbol.
If you want the `|:|' character to be treated as a punctuation mark
instead of as a relation, just call it ↑{:colon}:
\beginmathdemo
|$f:A\to B$|&f:A\to B\cr
|$f\colon A\to B$|&f\colon A\to B\cr
\endmathdemo
↑(:to)
If you want to use a comma as an ordinary symbol (e.g., when it
appears in a large number), just put it in braces; \TeX\ treats
anything in braces as an ordinary symbol. For instance,
\beginmathdemo
|$12,345x$|&12,345x\qquad\rm(wrong)\cr
|$12{,}345x$|&12{,}345x\qquad\,\rm(right)\cr
\endmathdemo
\dangerexercise What's an easy way to get a raised dot in a decimal
constant (e.g., `$3{\cdot}1416$')?
\answer |$3{\cdot}1416$|. \ (One of the earlier examples in this
chapter showed that ↑{:cdot} is a binary operation; putting it in braces
makes it act like an ordinary symbol.)\par
If you have lots of constants like this, for example in a table, there's a way
to make ordinary periods act like |\cdot| symbols: Just define
↑{*mathcode}|`.| to be |"0202|, assuming that the fonts of plain \TeX\ are
being used. However, this could be dangerous, since ordinary
periods are used frequently in displayed equations; the |\mathcode| change
should be confined to places where every period is to be a |\cdot|.
So far we have considered letters, other ordinary symbols, binary operations,
relations, and punctuation marks; hence we have covered almost every key on
the typewriter. There are just a few more: The characters `|(|' ↑(lparen)
and `|[|' ↑(lbracket) are called ``↑{openings},'' while `|)|' ↑(rparen) and
`|]|' ↑(rbracket) are called ``↑{closings}''; these act pretty much like
ordinary symbols, but they help \TeX\ to decide when a binary operation is
not really being used in a binary way.
Then there are the characters |'| and@|"|, which we know are
used as abbreviations for |↑\prime| and |↑{\prime\prime}|, respectively.
Finally, we know that plain \TeX\ reserves the other ten characters:
\begintt
\ $ % # & @ { } _ ↑
\endtt
These are not usable for symbols in math mode unless their ↑{*catcode}
values are changed (see Chapter@7). Although |{| and |}| specify
grouping, the control sequences `|\{|' and `|\}|' ↑(:lbrace)↑(:rbrace) can
be used to get `$\{$' as an opening and `$\}$' as a closing.
\ddanger All of these math mode interpretations are easily changeable, since
each character has a ↑{*mathcode}, as explained in Chapter@17; none of
the conventions are permanently built into \TeX. However, most of them are
so standard that it is usually unwise to make many changes, except perhaps
in the interpretations of |`| and |~|.
The special characters |↑| and |_| that designate superscripts
↑(circumflex)↑(underbar)
and subscripts should not be used except in formulas. Similarly,
the names of math symbols like |\alpha| and |\approx|, and the
control sequences for math operations like |\overline|, must not
invade ordinary text. \TeX\ uses these facts to detect ↑{missing dollar
signs} in your input, before such mistakes cause too much trouble. For
example, suppose you were to type
\begintt
The smallest $n such that $2↑n>1000$ is@10.
\endtt
\TeX\ doesn't know that you forgot a `|$|' after the first `|n|', because
it doesn't understand English; so it finds a ``formula'' between the
first two |$| signs:
$$\displaybox{The smallest $n such that $}$$
after which it thinks that `|2|' is part of the text. But then the |↑|
reveals an inconsistency; \TeX\ will automatically insert a@|$| before
the@|↑|, and you will get an error message. In this way the computer has
gotten back into synch, and the rest of the document can be typeset as if
nothing had happened.
\danger Conversely, a blank line or ↑{*par} is not permitted in math mode.
This gives \TeX\ another way to recover from a missing@|$|; such
errors will be confined to the paragraph in which they occur.
\danger If for some reason you cannot use |↑| and |_| for superscripts
and subscripts, because you have an unusual keyboard or because you need
|↑| for French accents or something, plain \TeX\ lets you type ↑{:sp}
and ↑{:sb} instead. For example, `|x\sp2|' is another way to get `$x\sp2$'.
On the other hand, some people are lucky enough to have keyboards that
contain additional symbols besides those of standard ascii.
↑(character set)
When such symbols are available, \TeX\ can be set up to make math
typing a bit more pleasant. For example, at the author's installation there
are keys labeled \up\ and@\dn\ that produce visible symbols
(these make superscripts and subscripts look much nicer
on the screen); there are keys for the relations {\tt\rlap{\char'32}<},
{\tt\rlap{\char'32}>}, and {\tt\rlap/=} (these save time); and there are
about two dozen more keys that occasionally come in handy.
↑(uparrow)↑(downarrow)↑(leq)↑(geq)↑(neq)
\danger Mathematicians are fond of using ↑{accents} over letters, because
this is often an effective way to indicate relationships between
mathematical objects, and because it greatly extends the number of available
symbols without increasing the number of necessry fonts.
Chapter@9 discusses the use of accents in ordinary text, but mathematical
accents are somewhat different, because spacing is not the same; \TeX\ uses
special conventions for accents in formulas, so that the two sorts of
accents will not be confused with each other. The following math accents
are provided by plain@\TeX:
\beginmathdemo
|$\hat a$|&\hat a\cr
|$\check a$|&\check a\cr
|$\tilde a$|&\tilde a\cr
|$\dot a$|&\dot a\cr
|$\ddot a$|&\ddot a\cr
|$\breve a$|&\breve a\cr
|$\bar a$|&\bar a\cr
|$\vec a$|&\vec a\cr
\endmathdemo
↑(:hat)↑(:check)↑(:tilde)↑(:dot)↑(:ddot)↑(:breve)↑(:bar)↑(:vec)
The first seven of these are called |\↑|, |\v|, |\~|, |\.|, |\"|, |\u|,
and |\=|, respectively, when they appear in text; |\vec| is an accent
that appears only in formulas. \TeX\ will complain if you try to use
|\↑| or |\v|, etc., in formulas, or if you try to use |\hat| or |\check|, etc.,
in ordinary text.
\danger The problem with math accents is that a bit of finesse is necessary
to get them positioned just right; different characters look best with the
accent shifted in different ways, based on the shape of the character.
\TeX\ doesn't have enough information to do this, so it simply centers
the accent in each case. This looks pretty funny on some letters;
for example, `|$\hat A$|' and `|$\hat f$|' come out `$\hat A$'
and@`$\hat f$'. \ (You might, in fact, think of another adjective besides
``funny'' to describe such results.) \ Therefore plain \TeX\ provides
a control sequence called ↑{:skew} that makes it fairly easy to put
accents in their proper place. If you write `|$\skew5\hat A$|', for example,
you get `$\skew5\hat A$', which look much better. The number following
|\skew| specifies a relative amount by which the accent is to be shifted
right; for example, `|$\skew3\hat A$|' and `|$\skew7\hat A$|' come out
looking like `$\skew3\hat A$' and `$\skew7\hat A$'. By fiddling with the
amount of skew you can find the setting that pleases you best. Here is
a formula that shows the author's recommendations for all the italic
letters:
\def\\#1#2{\setbox0=\hbox{$#2_{\hskip\minusthe\scriptspace}$}
\hbox to 1wd0{$\skew#1\hat#2$\hss}{}_#1}
$$\vbox{\leftskip\the\parindent \noindent
$\\5A+\\3B+\\3C+\\2D+\\3E+\\3F+\\3G+\\3H+\\4I+\\6J+\\2K+\\1L+\\3M+\\3N
+\\3O+\\3P+\\3Q+\\3R+\\3S+\\3T+\\1U+\\0V+\\0W+\\3X+\\0Y+\\3Z
+\\0a+\\0b+\\2c+\\6d+\\2e+\\6f+\\1g+\\0h+\\1\imath+\\3\jmath+\\0k+\\3l+\\4\ell
+\\0m+\\0n+\\2o+\\3p+\\3q+\\2r+\\2s+\\3t+\\1u+\\1v+\\3w+\\1x+\\2y+\\2z
+\\3{{\mit\Gamma}}+\\6{{\mit\Delta}}+\\3{{\mit\Theta}}+\\6{{\mit\Lambda}}
+\\3{{\mit\Xi}}+\\3{{\mit\Pi}}+\\3{{\mit\Sigma}}+\\2{{\mit\Upsilon}}
+\\3{{\mit\Phi}}+\\2{{\mit\Psi}}+\\3{{\mit\Omega}}
+\\1\alpha+\\3\beta+\\0\gamma+\\2\delta+\\2\epsilon+\\2\varepsilon
+\\3\zeta+\\2\eta+\\3\theta+\\3\vartheta
+\\2\iota+\\0\kappa+\\0\lambda+\\1\mu+\\1\nu+\\4\xi+\\0\pi+\\0\varpi
+\\3\rho+\\0\sigma+\\1\tau+\\1\upsilon+\\3\phi+\\3\varphi+\\2\chi
+\\4\psi+\\0\omega$.}$$
The subscript on each letter shows the amount of skew that was used;
for example, `$\\3B$' means that `$\skew3\hat B$' is the result of
`|$\skew3\hat B$|'. The same skews work with |\tilde| and the other math
accents, as well as it does with |\hat|.
\danger Notice that ↑{dotless} $i$ and $j$ were used in the symbols
`$\skew1\hat\imath$' and `$\skew3\hat\jmath$'. To get these symbols,
the author typed `|$\skew1\hat\imath$|' and `|$\skew3\hat\jmath$|'.%
↑(:imath)↑(:jmath)
\danger It's generally best to define special control sequences for
the accented letters that you need. For example, you should insert
↑(*def)
\begintt
\def\Ahat{\skew5\hat A}
\def\chat{\skew2\hat c}
\def\scheck{\skew3\check s}
\def\xtilde{\skew1\tilde x}
\def\zbar{\skew2\bar z}
\endtt
at the beginning of a manuscript that uses the symbols $\skew5\hat A$,
$\skew2\hat c$, $\skew3\check s$, $\skew1\tilde x$, and $\skew2\bar z$.
This approach works far better than typing |\skew5\hat A| whenever
you want $\skew5\hat A$ in a formula, because it saves you a lot of
keystrokes, and especially because you or your editor might decide at some
late date to change to some other style of type. If the type style needs
to be changed, another skew might turn out to be better; but the change
will be simple, if you have used control sequences like |\Ahat|, because
you will need to fix only a few definitions instead of dozens of formulas.
\def\Ahat{\skew5\hat A}
\danger Accented letters can cause superscripts to be shifted too high,
because \TeX\ bases the height of a superscript on the size of whatever is
being superscripted. An accented character is taller than an unaccented
one, so its superscript may go up a bit more. For example, `|$\hat A↑2$|'
produces `$\hat A↑2$', which is wrong on two counts: not only is the hat
in the wrong position, the@2 is too high. If you try `|$\hat{A↑2}$|' the
result is wrong in another way, because this puts an accent over the
subformula `|A↑2|': you@get@`$\hat{A↑2}$'. The |\skew| control sequence
takes care of this by putting an empty subformula `|{}|' after what is
accented. Thus, for example, `|$\Ahat↑2$|' will give the proper
output@`$\Ahat↑2$', if\/ |\Ahat| has been defined as suggested above.
\danger You can put accents on top of accents, making symbols like
$\skew6\hat{\skew5\hat A}$ that might cause a mathematician to
squeal with ecstasy. It is generally best to do this by adding one
unit of skew to the upper accent, unless the letters aren't slanted.
For example, `|$\skew6\hat{\skew5\hat A}$|' was used to produce the symbol
above; `|$\skew2\bar{\skew1\tilde x}$|' yields
`$\skew2\bar{\skew1\tilde x}$'; and
`|$\bar{\bar{\bf x}}$|' yields@`$\bar{\bar{\bf x}}$'.
\danger It's possible, in fact, to put math accents on any subformula,
not just on single characters. But there's not much point in doing so,
because \TeX\ just centers the accent over the whole subformula. For
example, `|$\hat{I+M}$|' yields `$\hat{I+M}$'. In particular, a |\bar|
accent always stays the same size; it's not like ↑{*overline}, which
grows with the formula under it. Some people prefer the longer line
from |\overline| even when it applies to only a single letter;
for example, `|$\skew2\bar|@|z+\overline|@|z$|' produces
`$\skew2\bar z+\overline z$', and you can take your pick.
However, plain \TeX\ does provide two accents that grow;
they are called ↑{:widehat} and ↑{:widetilde}:
\beginmathdemo
|$\widehat x,\widetilde x$|&\tenmath\widehat x,\widetilde x\cr
|$\widehat{xy},\widetilde{xy}$|&\tenmath\widehat{xy},\widetilde{xy}\cr
|$\widehat{xyz},\widetilde{xyz}$|&\tenmath\widehat{xyz},\widetilde{xyz}\cr
\endmathdemo
The third example here shows the maximum size available.
\def\ghat{\skew1\hat g}
\exercise This has been another long chapter; but cheer up, you have learned
a lot! Prove it by explaining what to type in order to get the formulas
$e↑{-x↑2}$, $D~p↑\alpha M+l$, and $\ghat\in(H↑{\pi_1↑{-1}})'$. \ (In
the last example, assume that a control sequence |\ghat| has already been
defined, so that |\ghat| produces the accented letter $\ghat$.)
\answer |$e↑{-x↑2}$|, |$D~p↑\alpha M+l$|, and |$\ghat\in(H↑{\pi_1↑{-1}})'$|.
\ (If you are reading the dangerous bend sections, you know that the
recommended way to define |\ghat| is `|\def\ghat{\skew1\hat g}|'.)
\endchapter
Producing ↑{Greek} letters is as easy as $\pi$.
You just type |... as easy as $\pi$.|
\author LESLIE ↑{LAMPORT}, {\sl The ↑{L\kern-.2em\raise.9ex\hbox{a}%
\kern-.2em\TeX} Document Preparation System\/} (1983)
\bigskip
\TeX\ has no regard for the glories of the Greek tongue---\/
as far as it is concerned, Greek letters are just additional weird symbols,
and they are allowed\/ {\rm only} in math mode.
In a pinch you can get the output $\tau\epsilon\chi$ by typing %
|$\tau\epsilon\chi$|,
but if you're actually setting Greek text, you will be using
a different version of \TeX, designed for a keyboard with Greek letters on it,
and you shouldn't even be reading this manual, %
which is undoubtedly all English to you.
\author MICHAEL ↑{SPIVAK}, {\sl The Joy of \TeX\/} (1982)
\eject
\beginchapter Chapter 17. More about Math
Another thing mathematicians like to do is make fractions---and they
like to build symbols up on top of each other in a variety of different ways:
$$\display{1\over2}\qquad{\rm and}\qquad{n+1\over3}\qquad{\rm and}\qquad
{n+1\choose3}\qquad{\rm and}\qquad\sum_{n=1}↑3 Z_n\,.$$
You can get these four formulas as displayed equations by typing
`|$$1\over2$$|' and
`|$$n+1\over3$$|' and
`|$$n+1\choose3$$|' and
`|$$\sum_{n=1}↑3 Z_n$$|';
we shall study the simple rules for such constructions in this chapter.
↑(:sum)↑(:choose)
First let's look at ↑{fractions}, which use the `↑{*over}' notation. The
control sequence |\over| applies to everything in the formula unless you
use braces to enclose it in a specific subformula; in the latter
↑(stacked fractions, see over)
case, |\over| applies to everything in that subformula.
\begindisplaymathdemo
\it Input&\it Output\cr
\noalign{\vskip-3pt}
|$$x+y↑2\over k+1$$|&x+y↑2\over k+1\cr
\noalign{\vskip2pt}
|$${x+y↑2\over k}+1$$|&{x+y↑2\over k}+1\cr
\noalign{\vskip-1pt}
|$$x+{y↑2\over k}+1$$|&x+{y↑2\over k}+1\cr
\noalign{\vskip-1pt}
|$$x+{y↑2\over k+1}$$|&x+{y↑2\over k+1}\cr
\noalign{\vskip-3pt}
|$$x+y↑{2\over k+1}$$|&x+y↑{2\over k+1}\cr
\endmathdemo
You aren't allowed to use |\over| twice in the same subformula; instead of
typing something like
`|a \over b \over 2|', you must specify what goes over what:
\begindisplaymathdemo
\noalign{\vskip3pt}
|$${a\over b}\over 2$$|&{a\over b}\over 2\cr
|$$a\over{b\over 2}$$|&a\over{b\over 2}\cr
\endmathdemo
Unfortunately, both of these alternatives look pretty awful. Mathematicians
tend to ``overuse'' |\over| when they first begin to typeset their own work
on a system like \TeX. A good typist or copy editor will convert fractions
to a ``↑{slashed form},'' whenever a built-up construction would be too
small or too crowded. For example, the last two cases should be treated
as follows:
\begindisplaymathdemo
\noalign{\vskip3pt}
|$$a/b \over 2$$|&a/b \over 2\cr
|$$a \over b/2$$|&a \over b/2\cr
\endmathdemo
Conversion to slashed form takes a little bit of mathematical knowhow, since
↑{parentheses} sometimes need to be inserted in order to preserve the meaning
of the formula. Besides substituting `|/|' for@`|\over|', the two parts
of the fraction should be put in parentheses unless they are single
symbols; for example, $a\over b$@becomes simply@$a/b$, but
$a+1\over b$ becomes $(a+1)/b$, and $a+1\over b+1$ becomes
${(a+1)/(b+1)}$. Furthermore, the entire fraction should generally
be enclosed in parentheses if it appears next to something else;
for example, ${a\over b}x$ becomes $(a/b)x$. If you are a typist without
mathematical training, it's best to ask the author of the manuscript
for help, in doubtful cases; you might also tactfully suggest that
unsightly fractions be avoided altogether in future manuscripts.
\exercise What's a better way to render the formula $x+y↑{2\over k+1}$?
\answer $x+y↑{2/(k+1)}$\quad(|$x+y↑{2/(k+1)}$|).
\exercise Convert `${a+1\over b+1}x$' to slashed form.
\answer $((a+1)/(b+1))x$\quad(|$((a+1)/(b+1))x$|).
\exercise What surprise did B. L. ↑{User} get when he typed `|$$x=(y↑2\over
k+1)$$|'\thinspace?
\answer He got the displayed formula$$x=(y↑2\over k+1)$$ because he forgot
that an unconfined |\over| applies to everything. \ (He should probably
have typed `|$$x=\left(y↑2\over k+1\right)$$|', using ideas that will be
presented later in this chapter; this not only makes the parentheses
larger, it keeps the `$x=$' out of the fraction, because |\left| and
|\right| introduce subformulas.)
\def\cents{\hbox{\rm\rlap/c}}
\exercise How can you make `$7{1\over2}\cents$'? \ (Assume that
the control sequence |\cents| yields@`$\cents$'.)↑(money)↑(cents)
\answer `|$7{1\over2}\cents$|' or `|7$1\over2$\cents|'. \ (Incidentally,
the definition used here was |\def\cents{\hbox{\rm\rlap/c}}|.) ↑(:rlap)
The examples above show that letters and other symbols sometimes get
smaller when they appear in fractions, just as they get smaller when they
are used as exponents. It's about time that we studied \TeX's method for
choosing the sizes of things. \TeX\ actually has eight different
↑{styles} in which it can treat formulas, namely
$$\halign{\indent#\hfil\quad\hfil\cr
display style&(for formulas displayed on lines by themselves)\cr
text style&(for formulas embedded in the text)\cr
script style&(for formulas used as superscripts or subscripts)\cr
scriptscript style&(for second-order superscripts or subscripts)\cr}$$
↑(display style)↑(text style)↑(script style)↑(scriptscript style)
and four other ``↑{cramped}'' styles that are almost the same except that
exponents aren't raised quite so much. For brevity we shall refer to the
eight styles as
$$\display
D,\ D',\ T,\ T',\ S,\ S',\ \SS,\ \SS',$$
where $D$ is display style, $D'$ is cramped display style, $T$@is text style,
etc. \TeX\ also uses three different ↑{sizes of type for mathematics};
they are called ↑{text size}, ↑{script size}, and ↑{scriptscript size}.
The normal way to typeset a formula with \TeX\ is to enclose it in dollar
signs |$|$\,\ldots\,$|$|; this yields the formula in text style
(style@$T$). Or you can enclose it in double dollar signs |$$|$\,\ldots\,$|$$|;
this displays the formula in display style (style@$D$). The subformulas of
a formula might, of course, be in different styles. Once you know
the style, you can determine the size of type that \TeX\ will use:
$$\displayvbox{\halign{#\hfil\qquad\hfil&\quad#(like this)\hfil\cr
If a letter is in style&then it will be set in\cr
\noalign{\vskip 2pt}
$D,D',T,T'$&text size&\cr
$S,S'$&script size&\sevenrm\cr
$\SS,\SS'$&scriptscript size&\fiverm\cr}}$$
There is no ``$\it SSS$'' style or ``scriptscriptscript'' size; such tiny
symbols would be even less readable than the scriptscript ones. Therefore
\TeX\ stays with scriptscript size as the minimum:
$$\displayvbox{\halign{\hbox to 1.3in{#\hfil}&\hbox to 1.2in{#\hfil}\hfil\cr
In a formula&the superscript&and the subscript\cr
of style&style is&style is\cr
\noalign{\vskip 2pt}
$D,T$&$S$&$S'$\cr
$D',T'$&$S'$&$S'$\cr
$S,\SS$&$\SS$&$\SS'$\cr
$S',\SS'$&$\SS'$&$\SS'$\cr}}$$
For example, if |x↑{a_b}| is to be typeset in style $D$, then |a_b| will
be set in style@$S$, and {\tt b}@in style@$\SS'$; the result is
`$\displaystyle x↑{a_b}$'.
So far we haven't seen any difference between styles $D$ and $T$. Actually
there is a slight difference in the positioning of exponents, although
script size is used in each case: you get
$\displaystyle x↑2$@in $D$@style and $x↑2$@in $T$@style and \vbox to 0pt{
\vss\hbox{$\displaystyle{\atop x↑2}$}\kern0pt}@in $D'$ or $T'$@style---do
you see the difference? But there is a big distinction between $D$ style and
$T$ style when it comes to fractions:
$$\displayvbox{\halign{\hbox to 1.3in{#\hfil}&\hbox to 1.2in{#\hfil}\hfil\cr
In a formula&the style of the&and the style of the\cr
$\alpha$|\over|$\,\beta$ of style&numerator $\alpha$ is&denominator
$\beta$ is\cr
\noalign{\vskip 2pt}
$D$&$T$&$T'$\cr
$D'$&$T'$&$T'$\cr
$T$&$S$&$S'$\cr
$T'$&$S'$&$S'$\cr
$S,\SS$&$\SS$&$\SS'$\cr
$S',\SS'$&$\SS'$&$\SS'$\cr}}$$
↑(numerator)↑(denominator)
Thus if you type `|1\over2|' (in a text) you get $1\over2$, namely style
$S$ over style@$S'$; but if you type
`|$$1\over2$$|' you get $$1\over2$$ (a displayed formula), which is style
$T$ over style $T'$.
\danger While we're at it, we might as well finish the style rules:
↑{*underline} does not change the style. ↑{Math accents}, and the operations
↑{*sqrt} and ↑{*overline}, change uncramped styles to their cramped
counterparts; for example, $D$ changes to $D'$, but $D'$ stays as it was.
\dangerexercise State the style and size of each part of the formula
$\displaystyle \sqrt{p_2↑{e'}}$, assuming that the formula itself is in
style@$D$.
\answer Style $D'$ is used for the subformula $p_2↑{e'}$, hence style@$S'$
is used for the superscript@$e'$ and the subscript@2, and style@$\SS'$
is used for the superscript. The square root sign and the $p$ appear
in text size; the 2 and the@$e$ appear in script size; and the $\prime$
is in scriptscript size.
Suppose you don't like the style that \TeX\ selects by its automatic style
rules. Then you can specify the style you want by typing ↑{*displaystyle}
or ↑{*textstyle} or ↑{*scriptstyle} or ↑{*scriptscriptstyle}; the style
that you select will apply until the end of the formula or subformula, or
until you select another style. For example, the input
`|$$n+\scriptstyle n+\scriptscriptstyle n.$$|' produces the display
$$n+\scriptstyle n+\scriptscriptstyle n.$$
This is a rather silly example, but it does show
that the plus signs get smaller too, as the style changes. \TeX\ puts no
space around + signs in script style.
Here's a more useful example of style changes: Sometimes you need to
typeset a ``↑{continued fraction}'' made up of many other fractions,
all of which are supposed to be in display style:
$$a_0+{1\over\displaystyle a_1+
{\strut 1\over\displaystyle a_2+
{\strut 1\over\displaystyle a_3+
{\strut 1\over a_4}}}}$$
In order to get this effect, the idea is to type
\begintt
$$a_0+{1\over\displaystyle a_1+
{\strut 1\over\displaystyle a_2+
{\strut 1\over\displaystyle a_3+
{\strut 1\over a_4}}}}$$
\endtt
(The control sequence ↑{:strut} has been used to make the denominators
taller; this is a refinement that will be discussed in
Chapter@18. Our concern now is with the style commands.) \
Without the appearances of\/ |\strut| and |\displaystyle| in this formula,
the result would be completely different:
$$a_0+{1\over a_1+{1\over
a_2+{1\over a_3+{1\over a_4}}}}$$
\danger These examples show that the numerator and denominator of a fraction
are generally centered with respect to each other. If you prefer to have
the numerator or denominator appear ↑{flush left}, put `↑{*hfill}' after
it; or if you prefer ↑{flush right}, put `|\hfill|' at the left. For
example, if the first three appearances of `|1\over|' in the previous
example are replaced by `|1\hfill\over|', you get the display
$$a_0+{1\hfill\over\displaystyle a_1+
{\strut1\hfill\over\displaystyle a_2+
{\strut1\hfill\over\displaystyle a_3+
{\strut1\over a_4}}}}$$
(a format for continued fractions that many authors prefer). This works
because |\hfill| stretches at a faster rate than the glue that is
actually used internally by \TeX\ when it centers the numerators
and denominators.
\TeX\ has another operation `↑{*atop}', which is like |\over| except that
it leaves out the fraction line:
\begindisplaymathdemo
|$$x\atop y+2$$|&x\atop y+2\cr
\endmathdemo
The plain \TeX\ format in Appendix B also defines `↑{:choose}', which is
like |\atop| but it encloses the result in parentheses:
\begindisplaymathdemo
|$$n\choose k$$|&n\choose k\cr
\endmathdemo
It is called |\choose| because it's
a common notation for the so-called ``↑{binomial coefficient}''
that tells how many ways there are to choose $k$@things out of $n$@things.
You can't mix |\over| and |\atop| and |\choose| with each other.
For example, `|$$n \choose k \over 2$$|' is illegal; you must use
grouping, to get either `|$${n\choose k}\over2$$|' or
`|$$n\choose{k\over2}$$|', i.e.,
$$\display{{n\choose k}\over2}\qquad{\rm or}\qquad
{n\choose{k\over2}}.$$
The latter formula, incidentally, would look better as
`|$$n\choose k/2$$|' or `|$$n\choose{1\over2}k$$|', yielding
$$\display{n\choose k/2}\qquad{\rm or}\qquad{n\choose{1\over2}k}.$$
\medskip
\exercise As alternatives to $\displaystyle{{n\choose k}\over2}$,
discuss how you could obtain the two displays
$$\abovedisplayskip=0pt\belowdisplayskip=0pt\display
{1\over2}{n\choose k}
\qquad{\rm and}\qquad
{\displaystyle{n\choose k}\over2}.$$
\answer |$${1\over2}{n\choose k}$$|;
|$$\displaystyle{n\choose k}\over2$$|.
All of these braces are necessary.
\bigbreak
\exercise Explain how to specify the displayed formula
$${p \choose 2}x↑2 y↑{p-2} - {1 \over 1-x}{1 \over 1-x↑2}.$$
\answer |$${p \choose 2} x↑2 y↑{p-2} - {1 \over 1-x}{1 \over 1-x↑2}.$$|
\danger \TeX\ has a generalized version of\/ |\over| and |\atop| in which you
specify the exact thickness of the line rule by typing
`↑{*above}\<dimen>'. For example,
\begintt
$$\displaystyle{a\over b}\above1pt\displaystyle{c\over d}$$
\endtt
will produce a ↑{compound fraction} with a heavier ($1\pt$ thick) rule as
its main bar:
$${\displaystyle{a\over b}\above 1pt\displaystyle{c\over d}}.$$
This sort of thing occurs primarily in textbooks on elementary mathematics.
\goodbreak
Mathematicians often use the sign $\sum$ to stand for ``↑{summation}''
and the sign $\int$ to stand for ``↑{integration}.'' If you're a typist but not
a mathematician, all you need to remember is that ↑{:sum} stands for
$\sum$ and ↑{:int} for $\int$; these abbreviations appear in Appendix@F
together with all the other symbols, in case you forget. Symbols like
$\sum$ and $\int$ (and a few others like $\union$ and $\prod$ and $\oint$
and@$\oprod$, all listed in Appendix@F) are called {\sl ↑{large operators}},
↑(collective signs, see large operators)
and you type them just as you type ordinary symbols or letters. The
difference is that \TeX\ will choose a {\sl larger\/} large operator in
display style than it will in text style. For example,
$$\halign{\indent#\hfil\qquad yields\qquad&$#\hfil$\qquad\hfil\cr
|$\sum x_n$|&\sum x_n&($T$ style)\cr
\noalign{\vskip3pt}
|$$\sum x_n$$|&\displaystyle\sum x_n&($D$ style).\cr}$$
A displayed |\sum| usually occurs with ``↑{limits},'' i.e., with
subformulas that are to appear above and below it. You type limits just
as if they were superscripts and subscripts; for example, if you want
$$\sum_{n=1}↑m$$
you type either `|$$\sum_{n=1}↑m$$|' or `|$$\sum↑m_{n=1}$$|'. According
to the normal conventions of mathematical typesetting, \TeX\ will change
this to `$\sum_{n=1}↑m$' (i.e., without limits) if it occurs in text
style rather than in display style.
Integrations are slightly different from summations, in that the superscripts
and subscripts are not set as limits even in display style:
$$\halign{\indent\hbox to2.3in{#\hfil}\hbox to.6in{yields\hfil}&
$#\hfil$\qquad\hfil\cr
|$\int_{-\infty}↑{+\infty}$|&\int_{-\infty}↑{+\infty}&($T$ style)\cr
\noalign{\vskip3pt}
|$$\int_{-\infty}↑{+\infty}$$|&\displaystyle\int_{-\infty}↑{+\infty}&
($D$ style).\cr}$$
\danger Some printers prefer to set limits above and below $\int$ signs;
this takes more space on the page, but it
gives a better appearance if the subformulas are complex, because it
keeps them out of the way of the rest of the formula. Similarly, limits
are occasionally desirable in text style or script style; but some
printers prefer not to set limits on displayed $\sum$ signs. You can change
\TeX's convention by simply typing `↑{*limits}' or `↑{*nolimits}' immediately
after the large operator.
For example,
$$\halign{\indent\hbox to2.3in{#\hfil}\hbox to.6in{yields\hfil}&
$\displaystyle{#}$\hfil\cr
|$$\int\limits_0↑{\pi\over2}$$|&\int\limits_0↑{\pi\over2}\cr
\noalign{\vskip 4pt}
|$$\sum\nolimits_{n=1}↑m$$|&\sum\nolimits_{n=1}↑m\cr}$$
\ddanger If you say `|\nolimits\limits|' (presumably because some macro
like |\int| specifies |\nolimits|, but you do want them), the last word
takes precedence. There's also a command `↑{*displaylimits}' that can be
used to restore \TeX's normal conventions; i.e., the limits will be
displayed only in styles $D$ and $D'$.
\danger Sometimes you need to put two or more rows of limits under a large
operator; you can do this with `↑{*atop}'. For example, if you want
the displayed formula
$$\sum_{\scriptstyle0\le i\le m\atop\scriptstyle0<j<n}P(i,j)$$
the correct way to type it is
\begintt
$$\sum_{\scriptstyle0\le i\le m\atop\scriptstyle0<j<n}P(i,j)$$
\endtt
(perhaps with a few more spaces to make it look nicer in the manuscript
file). The instruction `↑{*scriptstyle}' was necessary here,
twice---otherwise the lines `$0\le i\le m$' and `$0<j<n$' would have been in
scriptscript size, which is too small. This is another instance of a rare
cases where \TeX's automatic style rules need to be overruled.
\exercise How would you type the displayed formula $\displaystyle
\sum_{i=1}↑p\sum_{j=1}↑q\sum_{k=1}↑ra_{ij}b_{jk}c_{ki}$\enspace?
\answer |$$\sum_{i=1}↑p\sum_{j=1}↑q\sum_{k=1}↑ra_{ij}b_{jk}c_{ki}$$|.
\dangerexercise And how would you handle $\displaystyle
\sum_{{\scriptstyle1\le i\le p\atop\scriptstyle1\le j\le q}
\atop\scriptstyle1\le k\le r}a_{ij}b_{jk}c_{ki}$\enspace?
\answer |$$\sum_{{\scriptstyle 1\le i\le p \atop \scriptstyle 1\le j\le q}
\atop \scriptstyle 1\le k\le r} a_{ij} b_{jk} c_{ki}$$|.
Since mathematical formulas can get horribly large, \TeX\ has to have some
way to make ever-larger symbols. For example, if you type
\begintt
$$\sqrt{1+\sqrt{1+\sqrt{1+
\sqrt{1+\sqrt{1+\sqrt{1+\sqrt{1+x}}}}}}}$$
\endtt
the result shows a variety of available ↑{square-root signs}:
$$\display\sqrt{1+\sqrt{1+\sqrt{1+
\sqrt{1+\sqrt{1+\sqrt{1+\sqrt{1+x}}}}}}}$$
The three largest signs here are all essentially the same, except for a
vertical segment `\vbox{\hbox{\tenex\char'165}\vss}' that gets repeated as
often as necessary to reach the desired size; but the smaller signs are
distinct characters found in \TeX's math fonts.
A similar thing happens with parentheses and other so-called
``↑{delimiter}'' symbols. For example, here are some of the different sizes of
↑(fences, see delimiters)
↑{parentheses} and ↑{braces} that plain \TeX\ might use in formulas:
$$\display\left(\vbox to 27pt{}\left(\vbox to 24pt{}\left(\vbox to 21pt{}
\Biggl(\biggl(\Bigl(\bigl(({\scriptstyle({\scriptscriptstyle(\hskip3pt
)})})\bigr)\Bigr)\biggr)\Biggr)\right)\right)\right)
\left\{\vbox to 27pt{}\left\{\vbox to 24pt{}\left\{\vbox to 21pt{}
\Biggl\{\biggl\{\Bigl\{\bigl\{\{{\scriptstyle\{{\scriptscriptstyle\{\hskip3pt
\}}\}}\}\bigr\}\Bigr\}\biggr\}\Biggr\}\right\}\right\}\right\}$$
The three largest pairs in each case are made with repeatable extensions,
so they can become as large as necessary.
↑(pieces of symbols)
Delimiters are important to mathematicians, because they provide good
visual clues to the underlying structure of complex expressions; they delimit
the boundaries of individual subformulas. Here is a list of the 22@basic
delimiters provided by plain \TeX:
$$\halign{\indent#\hfil\qquad\hfil\qquad&$#\hfil$\cr
\it Input&\it Delimiter\cr
\noalign{\vskip2pt}
|(|&left parenthesis: $($\cr
|)|&right parenthesis: $)$\cr
|[| or |\lbrack|&left bracket: $[$\cr
|]| or |\rbrack|&right bracket: $]$\cr
|\{| or |\lbrace|&left curly brace: $\{$\cr
|\}| or |\rbrace|&right curly brace: $\}$\cr
|\lfloor|&left floor bracket: $\lfloor$\cr
|\rfloor|&right floor bracket: $\rfloor$\cr
|\lceil|&left ceiling bracket: $\lceil$\cr
|\rceil|&right ceiling bracket: $\rceil$\cr
|\langle|&left angle bracket: $\langle$\cr
|\rangle|&right angle bracket: $\rangle$\cr
|/|&slash: $/$\cr
|\backslash|&reverse slash: $\backslash$\cr
\vrt\ or |\vert|&vertical bar: $\vert$\cr
|\|\vrt\ or |\Vert|&double vertical bar: $\Vert$\cr
|\uparrow|&upward arrow: $\uparrow$\cr
|\Uparrow|&double upward arrow: $\Uparrow$\cr
|\downarrow|&downward arrow: $\downarrow$\cr
|\Downarrow|&double downward arrow: $\Downarrow$\cr
|\updownarrow|&up-and-down arrow: $\updownarrow$\cr
|\Updownarrow|&double up-and-down arrow: $\Updownarrow$\cr
}$$
↑(:lbrack)↑(:rbrack)↑(:lbrace)↑(:rbrace)↑(:lfloor)↑(:rfloor)↑(:lceil)↑(:rceil)
↑(:langle)↑(:rangle)↑(:backslash)↑(:vert)↑(:Vert)↑(:uparrow)↑(:Uparrow)
↑(:downarrow)↑(:Downarrow)↑(:updownarrow)↑(:Updownarrow)
↑(bent bars, see langle, rangle) ↑(curly braces, see lbrace, rbrace)
↑(leftbracket)↑(rightbracket)↑(leftbrace)↑(rightbrace)↑(/)
In some cases, there are two ways to get the same delimiter; for example,
you can specify a left bracket by typing either `|[|' or `|\lbrack|'. The
latter alternative has been provided because the symbol `|[|' is not
readily available on all computer keyboards. Remember, however,
that you should never try to specify a left brace or right brace by simply
typing `|{|' or `|}|'; the |{| and |}| symbols are reserved for grouping.
The right way is to type `|\{|' or `|\}|' or `|\lbrace|' or `|\rbrace|'.
In order to get a slightly larger version of any of these symbols, just
precede them by `↑{:bigl}' (for opening delimiters) or `↑{:bigr}' (for
closing ones). This makes it easier to read formulas that contain
delimiters inside delimiters:
\beginlongmathdemo
\it Input&\it Output\cr
\noalign{\vskip2pt}
|$\bigl(x-s(x)\bigr)\bigl(y-s(y)\bigr)$|&
\bigl(x-s(x)\bigr)\bigl(y-s(y)\bigr)\cr
|$\bigl[x-s[x]\bigr]\bigl[y-s[y]\bigr]$|&
\bigl[x-s[x]\bigr]\bigl[y-s[y]\bigr]\cr
|$\bigl|\vrt| |\vrt|x|\vrt|+|\vrt|y|\vrt| \bigr|\vrt|$|&
\bigl\vert\vert x\vert+\vert y\vert\bigr\vert\cr
|$\bigl\lfloor\sqrt A\bigr\rfloor$|&
\bigl\lfloor\sqrt A\bigr\rfloor\cr
\endmathdemo
The |\big| delimiters are just enough bigger than ordinary ones so that
the difference can be perceived, yet small enough to be used in the text
of a paragraph. Here are all@22 of them, in the ordinary size and in
the |\big| size:
$$\displayvbox{
\hbox{$(\,)\,[\,]\,\{\,\}\,\lfloor\,\rfloor\,\lceil\,\rceil\,\langle\,\rangle
\,/\,\backslash\,\vert\,\Vert\,\uparrow\,\Uparrow\,\downarrow\,\Downarrow
\,\updownarrow\,\Updownarrow$}
\smallskip
\hbox{$\bigl(\,\bigr)\,\bigl[\,\bigr]\,\bigl\{\,\bigr\}\,\bigl\lfloor
\,\bigr\rfloor\,\bigl\lceil\,\bigr\rceil\,\bigl\langle\,\bigr\rangle
\,\big/\,\big\backslash\,\big\vert\,\big\Vert\,\bigm\uparrow\,\bigm\Uparrow
\,\bigm\downarrow\,\bigm\Downarrow\,\bigm\updownarrow\,\bigm\Updownarrow$}
}$$
You can also type ↑{:Bigl} and ↑{:Bigr} to get larger symbols suitable for
displays:
$$\displaybox{$
\Bigl(\,\Bigr)\,\Bigl[\,\Bigr]\,\Bigl\{\,\Bigr\}\,\Bigl\lfloor
\,\Bigr\rfloor\,\Bigl\lceil\,\Bigr\rceil\,\Bigl\langle\,\Bigr\rangle
\,\Big/\,\Big\backslash\,\Big\vert\,\Big\Vert\,\Bigm\uparrow\,\Bigm\Uparrow
\,\Bigm\downarrow\,\Bigm\Downarrow\,\Bigm\updownarrow\,\Bigm\Updownarrow$}
$$
These are 50\% taller than their |\big| counterparts. Displayed formulas
most often use delimiters that are even taller (twice the size of\/ |\big|);
such delimiters are constructed by ↑{:biggl} and ↑{:biggr}, and they
look like this:
$$\displaybox{$
\biggl(\,\biggr)\,\biggl[\,\biggr]\,\biggl\{\,\biggr\}\,\biggl\lfloor
\,\biggr\rfloor\,\biggl\lceil\,\biggr\rceil\,\biggl\langle\,\biggr\rangle
\,\bigg/\,\bigg\backslash\,\bigg\vert\,\bigg\Vert\,\biggm\uparrow
\,\biggm\Uparrow\,\biggm\downarrow\,\biggm\Downarrow\,\biggm\updownarrow
\,\biggm\Updownarrow$}
$$
Finally, there are ↑{:Biggl} and ↑{:Biggr} versions, 2.5 times as tall
as the |\bigl| and |\bigr| delimiters:
$$\displaybox{$
\Biggl(\,\Biggr)\,\Biggl[\,\Biggr]\,\Biggl\{\,\Biggr\}\,\Biggl\lfloor
\,\Biggr\rfloor\,\Biggl\lceil\,\Biggr\rceil\,\Biggl\langle\,\Biggr\rangle
\,\Bigg/\,\Bigg\backslash\,\Bigg\vert\,\Bigg\Vert\,\Biggm\uparrow
\,\Biggm\Uparrow\,\Biggm\downarrow\,\Biggm\Downarrow\,\Biggm\updownarrow
\,\Biggm\Updownarrow$}
$$
\medskip
\exercise Guess how to type the formula $\displaystyle
\biggl({\partial↑2\over\partial x↑2}+{\partial↑2\over\partial y↑2}
\biggr)\bigl\vert\varphi(x+iy)\bigr\vert↑2=0$, in display style,
using |\bigg| delimiters for the large parentheses. \ (The symbols $\partial$
and $\varphi$ that appear here are called ↑{:partial} and ↑{:varphi}.)
\answer |$$\displaystyle\biggl({\partial↑2\over\partial x↑2}+|\hfil\break
|{\partial↑2\over\partial y↑2}\biggr)\bigl|\vrt|\varphi(x+iy)\bigr|\vrt|↑2=0$$|.
\dangerexercise In practice, |\big| and |\bigg| delimiters are used much
more often than |\Big| and |\Bigg| ones. Why do you think this is true?
\answer Formulas that are more than one line tall are usually two lines tall,
not 1$1\over2$ or 2$1\over2$ lines tall.
\danger A |\bigl| or |\Bigl| or |\biggl| or |\Biggl| delimiter is an
↑{opening}, like a left parenthesis;
a |\bigr| or |\Bigr| or |\biggr| or |\Biggr| delimiter is a
↑{closing}, like a right parenthesis. Plain \TeX\ also provides
↑{:bigm} and ↑{:Bigm} and ↑{:biggm} and ↑{:Biggm} delimiters, for use
in the middle of formulas; such a delimiter plays the r\↑ole of a ↑{relation},
like an equals sign, so \TeX\ puts a bit of space on either side of it.
\beginlongmathdemo
|$\bigl(x\in A(n)\bigm|\vrt|x\in B(n)\bigr)$|&
\tenmath\bigl(x\in A(n)\bigm\vert x\in B(n)\bigr)\cr
\noalign{\vskip2pt}
|$\union_n X_n\bigm\|\vrt|\inter_n Y_n$|&
\tenmath\union_n X_n\bigm\Vert\inter_n Y_n\cr
\endmathdemo
↑(:union)↑(:inter)↑(:verticalline)↑(:in)
You can also say just ↑{:big} or ↑{:Big} or ↑{:bigg} or ↑{:Bigg}; this produces
a delimiter that acts as an ordinary variable. It is used primarily with
slashes and backslashes, as in the following example.
\beginlongmathdemo
\noalign{\vskip-2pt}
|$${a+1\over b}\bigg/{c+1\over d}$$|&
\tenmath\displaystyle{a+1\over b}\bigg/{c+1\over d}\cr
\endmathdemo
\dangerexercise What's the professional way to type
$\tenmath\bigl(x+f(x)\bigr)\big/\bigl(x-f(x)\bigr)$? \ (Look closely.)
\answer |$\bigl(x+f(x)\bigr) \big/ \bigl(x-f(x)\bigr)$|. \ (Notice especially
the `|\big/|'; an ordinary ↑{slash} would look too small between the
|\big| parentheses.
\TeX\ has a built-in mechanism that figures out how tall a pair of delimiters
needs to be, in order to enclose a given subformula; so you can use this
method, instead of deciding whether a delimiter should be |\big| or
|\bigg| or whatever. All you do is say
$$\displaybox{|\left|\<delim$_1$>\<subformula>|\right|\<delim$_2$>}$$
↑(*left)↑(*right)
and \TeX\ will typeset the subformula, putting the specified delimiters at
the left and this right. The size of the delimiters will be just big enough
to almost cover the subformula. For example, in the display
\beginlongdisplaymathdemo
|$$1+\left(1\over1-x↑2\right)↑3$$|&1+\left(1\over1-x↑2\right)↑3\cr
\endmathdemo
\TeX\ has chosen |\biggl(| and |\biggr)|, because smaller delimiters
would be too small for this particular fraction. A simple formula like
`|\left(x\right)|' yields just `$\left(x\right)$'; thus, |\left| and
|\right| sometimes choose delimiters that are smaller than |\bigl| and |\bigr|.
Whenever you use |\left| and |\right| they must pair up with each other,
like braces do in groups. You can't have |\left| in one formula
and |\right| in another, nor are you allowed to type things like
`|\left(...{...\right)...}|' or
`|\left(...\begingroup...\right...\endgroup|'.
This restriction makes sense, because \TeX\ needs to typeset the
subformula that appears between |\left| and |\right| before it can decide
how big to make the delimiters. But it is worth explicit mention here,
because you do {\sl not\/} have to match parentheses and brackets, etc.,
when you are not using |\left| and |\right|: \TeX\ will not complain if
you input a formula like `|$[0,1)$|' or even `|$)($|' or just `|$)$|'.\
(And it's a good thing \TeX\ doesn't, for such unbalanced formulas occur
surprisingly often in mathematics papers.) \ Even when you do use |\left|
and |\right|, \TeX\ doesn't look closely at the particular delimiters that
you happen to choose; thus, you can type strange things like `|\left)|'
and/or `|\right(|' if you know what you're doing. Or even if you don't.
The |\over| operation in the example displayed above does not involve the
`|1+|' at the beginning of the formula; this happens because |\left| and
|\right| have the function of ↑{grouping}, in addition to their function
of delimiter-making. Any definitions that you happen to make between
|\left| and |\right| will be local, as if braces had appeared around the
enclosed subformula.
\exercise Use |\left| and |\right| to typeset the following display
(with ↑{:phi} for $\phi$):
$$\pi(n)=\sum_{k=2}↑n\left\lfloor\phi(k)\over k-1\right\rfloor.$$
\answer |$$\pi(n)=\sum_{k=2}↑n\left\lfloor\phi(k)\over k-1\right\rfloor.$$|
At this point you are probably wondering why you should bother learning about
|\bigl| and |\bigr| and their relatives, when |\left| and |\right| are there
to calculate sizes for you automatically. Well, it's true that |\left|
and |\right| are quite handy, but there are at least three situations in which
you will want to use your own wisdom when selecting the proper delimiter size:
\ (1)@Sometimes |\left| and |\right| choose a smaller delimiter than you want.
For example, we used |\bigl| and |\bigr| to produce $\bigl\vert\vert x\vert+
\vert y\vert\bigr\vert$ in one of the previous illustrations; |\left| and
|\right| don't make things any bigger than necessary, so
`|\left|\vrt|\left|\vrt|x\right|\vrt|+\left|\vrt|y\right|\vrt|\right|\vrt'
yields only
`$\left\vert \left\vert x\right\vert +\left\vert y\right\vert \right\vert$'.
\ (2)@Sometimes |\left| and |\right| choose a larger delimiter than you want.
This happens most frequently when they enclose a large operator in a display;
for example, compare the following two formulas:
\beginlongdisplaymathdemo
\noalign{\vskip 6pt}
|$$\left( \sum_{k=1}↑n A_k \right)$$|&\left( \sum_{k=1}↑n A_k \right)\cr
\noalign{\vskip 3pt}
|$$\biggl( \sum_{k=1}↑n A_k \biggr)$$|&\biggl( \sum_{k=1}↑n A_k \biggr)\cr
\endmathdemo
The rules of\/ |\left| and |\right| cause them to enclose the ↑{:sum} together
with its ↑{limits}, but in special cases like this it looks better to let
the limits hang out a@bit; |\bigg| delimiters are better here.
\ (3)@Sometimes you need to break a huge displayed
formula into two or more separate lines, and you want to make sure that
its opening and closing delimiters have the same size; but you can't use
|\left| on the first line and |\right| on the last, since |\left| and
|\right| must occur in pairs. The solution is to use |\Biggl| (say) on
the first line and |\Biggr| on the last.
\danger Of course, one of the advantages of\/ |\left| and |\right| is that
they can make arbitrarily large delimiters---much bigger than |\biggggg|!
The slashes and angle brackets do have a maximum size, however; if you
ask for really big versions of those symbols you will get the largest
ones available.
\exercise Prove that you have mastered delimiters: Coerce \TeX\ into
producing the formula
$$\pi(n)=\sum_{m=2}↑n\left\lfloor\biggl(\sum_{k=1}↑{m-1}\bigl\lfloor
(m/k)/\lceil m/k\rceil\bigr\rfloor\biggr)↑{-1}\right\rfloor.$$
\answer |$$\pi(n)=\sum_{m=2}↑n\left\lfloor\biggl(\sum_{k=1}↑{m-1}\bigl|
\hfil\break
|\lfloor(m/k)/\lceil m/k\rceil\bigr\rfloor\biggr)↑{-1}\right\rfloor.$$|
\danger If you type `|.|' after |\left| or |\right|, instead of
specifying one of the basic delimiters, you get a so-called ↑{null
delimiter} (which is blank). Why on earth would anybody want that, you may
ask. Well, you sometimes need to produce formulas that contain only one
large delimiter. For example, the display
$$\vert x\vert=\cases{x,&if $x\ge0$\cr
-x,&if $x<0$.\cr}$$
has a `$\{$' but no `$\}$'. It can be produced by a construction of the form
$$\displaybox{|$$|\vrt|x|\vrt|=\left\{ ... \right.$$|}$$
Chapter 18 explains how to fill in the `\hbox{|...|}' to finish this
construction; let's just notice for now that the `|\right.|' makes it
possible to have an invisible right delimiter to go with the visible
left brace.
\ddanger A null delimiter isn't completely void; it is an empty box
whose width is a \TeX\ parameter called ↑{*nulldelimiterspace}.
We will see later that null delimiters are inserted next to fractions.
Plain \TeX\ sets |\nulldelimiterspace=1.2pt|.
You can type `|<|' or `|>|' as convenient abbreviations for ↑{:langle} and
↑{:rangle}, when \TeX\ is looking for a delimiter. For example,
`|\bigl<|' is equivalent to `|\bigl\langle|', and `|\right>|' is
equivalent to `|\right\rangle|'. Of course `|<|' and `|>|' ordinarily
produce the ↑{less-than} and ↑{greater-than} relations (${<}\,{>}$), which
are quite different from ↑{angle brackets} ($\langle\,\rangle$).
\danger Plain \TeX\ also makes available a few more delimiters, which were
not listed in the basic set of@22 because they are sort of special.
The control sequences ↑{:arrowvert}, ↑{:Arrowvert}, and ↑{:bracevert} produce
delimiters made from the repeatable parts of the vertical arrows, double
vertical arrows, and large braces, respectively, without the arrowheads
or the curly parts of the braces. They produce results similar to
↑{:vert} or ↑{:Vert}, but they are surrounded by more white space and
they have a different weight. You can also use ↑{:lgroup} and ↑{:rgroup},
which are constructed from braces without the middle parts; and
↑{:lmoustache} and ↑{:rmoustache}, ↑(moustaches)
which give you the top and bottom halves of large braces. For example,
here are the |\Big| and |\bigg| versions of\/ |\vert|, |\Vert|, and
these seven special delimiters:
$$\halign{\indent$#\hfil$\cr
\ldots\Big\vert\ldots\Big\Vert
\ldots\Big\arrowvert\ldots\Big\Arrowvert\ldots\Big\bracevert
\ldots\Big\lgroup\ldots\Big\rgroup\ldots\Big\lmoustache\ldots\Big\rmoustache
\ldots\,;\cr
\noalign{\smallskip}
\ldots\bigg\vert\ldots\bigg\Vert
\ldots\bigg\arrowvert\ldots\bigg\Arrowvert\ldots\bigg\bracevert
\ldots\bigg\lgroup\ldots\bigg\rgroup\ldots\bigg\lmoustache\ldots\bigg\rmoustache
\ldots\,.\cr}$$
Notice that |\lgroup| and |\rgroup| are rather like bold parentheses, with
sharper bends at the corners; this makes them attractive for certain large
displays. But you cannot use them exactly like parentheses, because
they are available only in large sizes (|\Big|@or@more).
\ddanger Question: What happens if a ↑{subscript} or ↑{superscript}
follows a large delimiter? Answer:@That's a good question. After a |\left|
delimiter, it is the first subscript or superscript of the enclosed
subformula, so it is effectively preceded by |{}|. After a |\right|
delimiter, it is a subscript or superscript of the entire |\left...\right|
subformula. And after a |\bigl| or |\bigr| or |\bigm| or |\big| delimiter,
it applies only to that particular delimiter. Thus, `|\bigl(_2|' works
quite differently from `|\left(_2|'.
\danger If you look closely at the examples of math typesetting in this
chapter, you will notice that large parentheses and brackets are
symmetric with respect to an invisible horizontal line that runs a little
bit above the ↑{baseline}; when a delimiter gets larger, its height and
depth both grow by the same amount. This horizontal line is called the
{\sl↑{axis}\/} of the formula; for example, a formula in the text of the
present paragraph would have an axis at this level: $\hskip 2em\over$. The
bar line in every fraction is centered on the axis, regardless of the size
of the numerator or denominator.
\danger Sometimes it is necessary to create a special box that should be
centered vertically with respect to the axis. \ (For example, the
`$\vert x\vert=\bigl\{\,\ldots$' example above was done with such a box.) \
\TeX\ provides a simple way to do this: you just say
$$\displaybox{|\vcenter{|\<vertical mode material>|}|}$$
and the vertical mode material will be packed into a box just as if
↑{*vcenter} had been ↑{*vbox}. This box will be raised or lowered until
its top edge is as far above the axis as the bottom edge is below.
\ddanger The concept of ``axis'' is meaningful for \TeX\ only in math
formulas, not in ordinary text; therefore \TeX\ allows you to use
|\vcenter| only in math mode. If you really need to center something
vertically in horizontal mode, the solution is to say `|$\vcenter{...}$|'.
\ (Incidentally, the constructions `|\vcenter| |to|\<dimen>'
and `|\vcenter| |expand|\<dimen>' are legal too, in math mode;
vertical glue is always set by the rules for |\vbox| in
Chapter@12. But |\vcenter| by itself is usually sufficient.)
\danger Any box can be put into a formula by simply saying ↑{*hbox} or
|\vbox| or ↑{*vtop} or ↑{*box} or ↑{*copy} in the normal way, even when
you are in math mode. Furthermore you can use ↑{*raise} or ↑{*lower}, as
if you were in horizontal mode, and you can insert vertical rules with
↑{*vrule}. Such constructions, like |\vcenter|, produce boxes that can be
used like ordinary symbols in math formulas.
\ddanger Sometimes you need to make up your own symbols, when you run across
something unusual that doesn't occur in the fonts. If the new symbol
occurs only in one place, you can use |\hbox| or |\vcenter| or something
to insert exactly what you want; but if you are defining a macro for
general use, you may want to use different constructions in different
styles. \TeX\ has a special feature called ↑{*mathchoice} that comes
to the rescue in such situations: you write
$$\displaybox{|\mathchoice{|\<math>|}{|\<math>|}{|\<math>|}{|\<math>|}|}$$
where each \<math> specifies a subformula. \TeX\ will choose the first
subformula in style $D$ or@$D'$, the second in style $T$ or@$T'$, the
third in style $S$ or@$S'$, the fourth in style $\SS$ or $\SS'$.
\ (\TeX\ actually typesets all four subformulas, before it chooses the
final one, because the actual style is not always known at the time a
|\mathchoice| is encountered; for example, when you type `|\over|' you often
change the style of everything that has occurred earlier in the formula.
Therefore |\mathchoice| is somewhat expensive in terms of time and space,
and you should use it only when you're willing to pay the price.)
\ddangerexercise Guess what output is produced by the following commands:
\begintt
\def\puzzle{{\mathchoice{D}{T}{S}{SS}}}
$$\puzzle{\puzzle\over\puzzle↑{\puzzle↑\puzzle}}$$
\endtt
\answer A displayed formula equivalent to |$${D}{{T}\over{T}↑{{S}↑{SS}}}$$|.
\ddangerexercise Devise a `↑{:square}' macro that produces a
\def\sqr#1#2{{\vcenter{\hrule height.#2pt
\hbox{\vrule width.#2pt height#1pt \hskip#1pt \vrule}
\hrule height.#2pt}}}%
`$\,\sqr34\,$' for use in math formulas. The box should be symmetrical
with respect to the axis, and its inside dimensions should be $3\pt$ in
display and text styles, $2.1\pt$ in script styles, and $1.5\pt$ in
scriptscript styles. The rules should be $0.4\pt$ thick in
display and text styles, $0.3\pt$ thick otherwise.
\answer{\obeylines|\def\sqr#1#2{{\vcenter{\hrule height.#2pt|
| \hbox{\vrule width.#2pt height#1pt \hskip#1pt \vrule}|
| \hrule height.#2pt}}}|
|\def\square{\mathchoice\sqr34\sqr34\sqr{2.1}3\sqr{1.5}3}|}
\ddanger Plain \TeX\ has a macro called ↑{:mathpalette} that is useful
for |\mathchoice| constructions; `|\mathpalette\a{xyz}|' expands to
the four-pronged array of choices
`|\mathchoice|\stretch|{\a|\stretch|\displaystyle|\stretch|{xyz}}|\stretch
|...|\stretch|{\a|\stretch|\scriptscriptstyle|\stretch|{xyz}}|\stretch'.
Thus the first argument to |\mathpalette| is a control sequence whose
first argument is a style selection. Appendix@B contains several examples
that show how |\mathpalette| can be applied. \ (See in particular the
definitions of\/ |\phantom|, |\root|, and |\smash|; the ↑{congruence sign}
↑{:cong} ($\cong$) is also constructed from $=$ and $~$ using |\mathpalette|.)
↑(constructing new math symbols)
↑(math symbols, construction of)
\ddanger At the beginning of this chapter we discussed the commands
|\over|, |\atop|, |\choose|, and |\above|. These are special cases of
\TeX's ``↑{generalized fraction}'' feature, which includes also the
three primitives
$$\halign{\indent#\hfil\cr
|\overwithdelims|\<delim$_1$>\<delim$_2$>\cr
|\atopwithdelims|\<delim$_1$>\<delim$_2$>\cr
|\abovewithdelims|\<delim$_1$>\<delim$_2$>\<dimen>\cr}$$
The third of these is the most general, as it encompasses all of the other
generalized fractions: ↑{*overwithdelims} uses a ↑{fraction} bar whose
thickness is the default for the current size, and ↑{*atopwithdelims} uses
an invisible fraction bar whose thickness is zero, while
↑{*abovewithdelims} uses a bar whose thickness is specified explicitly.
\TeX\ places the immediately preceding subformula (the ↑{numerator}) over
the immediately following subformula (the ↑{denominator}), separated by a
bar line of the desired thickness; then it puts \<delim$_1$> at the left
and \<delim$_2$> at the right. For example, `↑{:choose}' is equivalent to
`|\atopwithdelims()|'. If you define |\legendre| to be
`|\overwithdelims()|', you can typeset the ↑{Legendre symbol}
\def\legendre{\overwithdelims()}%
`$a\legendre b$' by saying `|{a\legendre b}|'. The size of the surrounding
delimiters depends only on the style, not on the size of the fractions;
larger delimiters are used in styles $D$ and@$D'$ (see Appendix@G\null). The
simple commands ↑{*over}, ↑{*atop}, and ↑{*above} are equivalent to the
corresponding `|withdelims|' commands when the delimiters are null; for
example, `|\over|' is an abbreviation for `|\overwithdelims..|'.
\def\euler{\atopwithdelims<>}
\ddangerexercise Define a control sequence |\euler| so that the
↑{Eulerian number} $n\euler k$ will be produced when you type `|{n\euler k}|'
in a formula.
\answer|\def\euler{\atopwithdelims<>}|.
\ddanger Appendix G explains exactly how \TeX\ computes the desired size
of delimiters for |\left| and@|\right|. The general idea is that delimiters
are vertically centered with respect to the ↑{axis}; hence, if we want
to cover a subformula between |\left| and |\right| that extends $y_1$@units
above the axis and $y_2$@units below, we need to make a delimiter whose
height plus depth is at least $y$@units, where $y=2\max(y_1,y_2)$.
It is usually best not to cover the formula completely, however,
but just to come close; so \TeX\ allows you to specify
two parameters, the ↑{*delimiterfactor}@$f$ (an@integer) and the
↑{*delimitershortfall}@$\delta$ (a@dimension). The minimum delimiter size
is taken to be at least $y\cdot f/1000$, and at least $y-\delta$. Appendix@B
sets $f=901$ and $\delta=5\pt$. Thus, if $y=30\pt$, the plain \TeX\ format
causes the delimiter to be more than $27\pt$ tall; if $y=100\pt$, the
corresponding delimiter will be at least $95\pt$ tall.
\danger So far we have been discussing the rules for typing math formulas,
but we haven't said much about how \TeX\ actually goes about converting
its input into lists of boxes and glue. Almost all of the control
sequences that have been mentioned in Chapters 16 and@17 are ``high level''
features of the plain \TeX\ format; they are not built into \TeX\ itself.
Appendix@B defines those control sequences in terms of more primitive
commands that \TeX\ actually deals with. For example, `|\choose|' is
an abbreviation for `|\atopwithdelims()|'; Appendix@B not only introduces
|\choose|, it also tells \TeX\ where to find the delimiters |(| and@|)|
in various sizes. The plain \TeX\ format defines all of the special
characters like |\alpha| and@|\mapsto|, all of the special accents like
|\tilde| and@|\widehat|, all of the large operators like |\sum| and@|\int|,
and all of the delimiters like |\lfloor| and@|\vert|. Any of these things
can be redefined, in order to adapt \TeX\ to other mathematical styles
and/or to other fonts.
\danger The remainder of this chapter discusses the low-level commands
that \TeX\ actually obeys behind the scenes. Every paragraph on the next
few pages is marked with double dangerous bends, so you should skip to
Chapter@18 unless you are a glutton for \TeX nicalities.
\ninepoint
\ddanger All characters that are typeset in math mode belong to one of
sixteen {\sl↑{families} of fonts}, numbered internally from 0 to@15. Each
of these families consists of three fonts: one for text size, one for
script size, and one for scriptscriptsize. The commands ↑{*textfont},
↑{*scriptfont}, and ↑{*scriptscriptfont} are used to specify the members
of each family. For example, ↑{family@0} in the plain \TeX\ format is
used for roman letters, and Appendix@B contains the instructions
\begintt
\textfont0=\tenrm
\scriptfont0=\sevenrm
\scriptscriptfont0=\fiverm
\endtt
to set up this family: The 10-point roman font (↑{:tenrm}) is used for
normal symbols, 7-point roman (↑{:sevenrm}) is used for subscripts, and
5-point roman (↑{:fiverm}) is used for sub-subscripts. Since there are up to
256@characters per font, and 3@fonts per family, and 16@families, \TeX\ can
access up to 12,288 characters in any one formula (4096@in each of
the three sizes). Imagine that.
\ddanger A definition like |\textfont|\<family number>|=|\<font identifier>
is local to the group that contains it, so you can easily change family
membership from one set of conventions to another and back again. Furthermore
you can put any font into any family; for example, the command
\begintt
\scriptscriptfont0=\the\scriptfont0
\endtt
↑(*the)
makes sub-subscripts in family@0 the same size as the subscripts currently
are. \TeX\ doesn't check to see if the families are sensibly organized; it
just follows instructions. \ (However, fonts cannot be used in families
2 and@3 unless they contain a certain number of special parameters, as we
shall see later.) \ Incidentally, \TeX\ uses ↑{*nullfont}, which contains
no characters, for each family member that has not been defined.
\ddanger During the time that a math formula is being read,
\TeX\ remembers each symbol as being ``character position so-and-so in
family number such-and-such,'' but it does not take note of what fonts
are actually in the families until reaching the end of the formula.
Thus, if you have loaded a font called |\oldstyle| that contains ↑{old-style
numerals}, and if you say something like
\begintt
$\textfont0=\tenrm 9 \textfont0=\oldstyle 9$
\endtt
you will get two 9's in font |\oldstyle|, assuming that \TeX\ has been
set up to take 9's from family@0. The reason is that |\textfont0| is@|\oldstyle|
at the end of the formula, and that's when it counts. On the other hand,
if you say
\begintt
$\textfont0=\tenrm 9 \hbox{$9\textfont0=\oldstyle$}$
\endtt
the first 9 will be from |\tenrm| and the second from |\oldstyle|, because
the formula in the hbox will be typeset before it is incorporated into
the formula containing this box.
\ddangerexercise If you say `|${\textfont0=\oldstyle 9}$|', what
font will be used for the@9?
\answer The |\textfont0| that was current at the beginning of the formula
will be used, because this redefinition is local to the braces. \
(It would be a different story if `↑{*global}|\textfont|' had appeared instead;
that would have changed the meaning of \hbox{|\textfont0|} at all levels.)
\ddanger Every ↑{math character} is given an identifying code
number between 0 and@4095, obtained by adding 256@times the family number
to the position number. This is easily expressed in ↑{hexadecimal
notation}, using one hexadecimal digit for the family and two for the
character; for example, \hex{24A} stands for character@\hex{4A} in
family@2. Each character is also assigned to one of eight classes,
numbered 0 to@7, as follows:
$$\halign{\indent#\hfil&\quad#\hfil&\quad#\hfil&
\hskip4em#\hfil&\quad#\hfil&\quad#\hfil\cr
\it \kern-2pt Class&\it Meaning&\kern-2pt\it Example&
\it \kern-2pt Class&\it Meaning&\kern-2pt\it Example\cr
\noalign{\vskip2pt}
0&Ordinary&|/|&
4&Opening&|(|\cr
1&Large operator&|\sum|&
5&Closing&|)|\cr
2&Binary operation&|+|&
6&Punctuation&|,|\cr
3&Relation&|=|&
7&Variable family&|x|\cr
}$$
↑(large operator)↑(binary operation)↑(relation)↑(opening)↑(closing)
↑(punctuation)↑(variable family)
Classes 0 to 6 tell what ``part of speech'' the character belongs to, in
math-printing language; class@7 is a special case discussed below. The class
number is multiplied by 4096 and added to the character number, and this
is the same as making it the leading digit of a four-digit hexadecimal
number. For example, Appendix@B defines |\sum| to be the math character
\hex{1350}, meaning that it is a large operator (class@1) found in position
\hex{50} of family@3.
\ddangerexercise The ↑{:oplus} and ↑{:bullet} symbols ($\oplus$ and $\bullet$)
are binary operations that appear in positions 8 and@15 (decimal)
of family@2, when the fonts of plain@\TeX\ are being used. Guess
what their math character codes are. \ (This is too easy.)
\answer |"2208| and |"220F|.
\ddanger Class 7 is a special case that allows math symbols to change families.
It behaves exactly like class@0, except that the specified family is
replaced by the current value of an integer parameter called ↑{*fam},
provided that |\fam| is a legal family number (i.e., if it lies between
0 and@15). \TeX\ automatically sets |\fam=-1| whenever math mode is entered;
therefore class@7 and class@0 are equivalent unless |\fam| has been
given a new value. Plain \TeX\ changes |\fam| to@0 when the user
types `↑{:rm}'; this makes it convenient to get roman letters in formulas,
as we will see in Chapter@18, since letters belong to class@7. \ (The
control sequence |\rm| is an abbreviation for `|\fam=0\tenrm|'; thus,
|\rm| causes
|\fam| to become zero, and it makes |\tenrm| the ``↑{current font}.''
In horizontal mode, the |\fam| value is irrelevant and the current font
governs the typesetting of letters; but in math mode, the current font is
irrelevant and the |\fam| value governs the letters. The current font
affects math mode only if dimensions are given in ↑{.ex} or ↑{.em} units;
it also has an effect if an |\hbox| appears inside a formula, since
the contents of an hbox are typeset in horizontal mode.)
\ddanger The interpretation of characters in math mode is defined by a
table of@128 ``mathcode'' values; these table entries can be changed
by the ↑{*mathcode} command, just as the category codes are changed
by ↑{*catcode} (see Chapter@7). Each mathcode specifies class, family, and
character position, as described above. For example, Appendix@B contains
the commands
\begintt
\mathcode`~="3218
\mathcode`*="2203
\endtt
which cause \TeX\ to treat the character `|~|' in math mode as a relation
↑(tilde) (class@3) found in position \hex{18} of family@2, and to treat an
↑{asterisk} `|*|' as a binary operation found in position@3 of that same family.
The initial value of\/ |\mathcode`b| is \hex{7162}; thus, |b|@is character
\hex{62} in ↑{family@1} (italics), and its family will vary with |\fam|.
\TeX\ looks at the mathcode only when it is typesetting a character whose
catcode is 11@(letter) or 12@(other), or when it encounters a character that
is given explicitly as ↑{*char}\<number>. \ (If\/ |\char| is used with a
character code between 128 and@255, there is no |\mathcode| value; ↑{family@0}
and class@0 are implied.)
\ddanger A |\mathcode| can also have the special value \hex{8000}, which
causes the character to behave as if it has catcode@13 (active). Appendix@B
uses this feature to make |'| ↑(apostrophe) expand to |↑|↑{:prime} and
|"| ↑(doublequote) expand to |↑{\prime\prime}|. Such mathcodes don't
interfere with the use of |'| and |"| in ↑{octal} or ↑{hexadecimal} numbers.
\ddanger The mathcode table allows you to refer indirectly to any character in
any family, with the touch of a single key. You can also specify a math
character code directly, by typing ↑{*mathchar}, which is analogous to
↑{*char}. For example, the command `|\mathchar"1ABC|' specifies a
character of class@1, family@10 (\hex A), and position \hex{BC}. A@hundred
or so definitions like
\begintt
\def\sum{\mathchar"1350 }
\endtt
would therefore suffice to define the special symbols of plain \TeX. But
there is a better way: \TeX\ has a primitive command ↑{*mathchardef},
which relates to |\mathchar| just as ↑{*chardef} does to |\char|.
Appendix@B has a hundred or so definitions like
\begintt
\mathchardef\sum="1350
\endtt
to define the special symbols. A |\mathchar| must be between 0 and 32767
(\hex{7FFF}).
\ddanger A character of class@1, i.e., a ↑{large operator} like |\sum|, will
be vertically centered with respect to the axis when it is typeset. Thus,
the large operators can be used with different sizes of type. This vertical
adjustment is not made for symbols of the other classes. ↑(raising characters)
↑(lowering characters)
\ddanger \TeX\ associates classes with subformulas as well as with individual
characters. Thus, for example, you can treat a complex construction as if
it were a binary operation or a relation, etc., if you want to. The
commands ↑{*mathord}, ↑{*mathop}, ↑{*mathbin}, ↑{*mathrel}, ↑{*mathopen},
↑{*mathclose}, and ↑{*mathpunct} are used for this purpose; each of them
is followed either by a single character or by a subformula in braces.
For example, |\mathopen\mathchar"1234| is equivalent to |\mathchar"4234|,
because |\mathopen| forces class@4 (opening). In the formula
`|$G\mathbin:H$|', the ↑{colon} is@treated as a binary operation.
And Appendix@B constructs large opening symbols by
defining ↑{:bigl}|#1| to be an abbreviation for
\begintt
\mathopen{\hbox{$\left#1 ...\right.$}}
\endtt
There's also an eighth classification, ↑{*mathinner}, which is not
normally used for individual symbols; fractions and ↑{*left}|...|↑{*right}
constructions are treated as ``inner'' subformulas, which means that
they will be surrounded by additional space in certain circumstances.
All other subformulas are generally treated as ordinary symbols,
whether they are formed by |\overline| or |\hbox| or |\vcenter| or
by simply being enclosed in braces. Thus, |\mathord| isn't really
a necessary part of the \TeX\ language; instead of typing
`|$1\mathord,234$|' you can get the same effect from `|$1{,}234$|'.
\ddangerexercise Commands like |\mathchardef\alpha="10B| are used in
Appendix@B to define the lower-case ↑{Greek} letters. Suppose that you want
to extend plain \TeX\ by putting ↑{boldface math italic} letters
in family@9, analogous to the normal math italic letters in family@1.
\ (Such fonts aren't available in stripped down versions of \TeX, but
let's assume that they exist.) \ Assume that the control sequence
|\bmit| has been defined as an abbreviation for `|\fam=9|'; hence
`|{\bmit b}|' will give a boldface math italic@|b|. What change to the
definition of\/ |\alpha| will make |{\bmit\alpha}| produce a boldface@alpha?
\checkequals\bmiexno{\count\exno}
\answer |\mathchardef\alpha="710B|. Incidentally, |{\rm\alpha}| will
then give a spurious result, because character position \hex{0B} of
roman fonts does not contain an alpha; you should warn
your users about what characters they are allowed to type under the
influence of special conventions like ↑{:rm}.
\ddanger ↑{Delimiters} are specified in a similar but more complicated
way. Each character has not only a@|\catcode| and a@|\mathcode| but also
a@↑{*delcode}, which is either negative (for characters that should not
act as delimiters) or less than \hex{1000000}. In other words,
nonnegative delcodes consist of six hexadecimal digits. The first three
digits specify a ``small'' variant of the delimiter, and the last three
specify a ``large'' variant. For example, the command
\begintt
\delcode`x="123456
\endtt
means that if the letter |x| is used as a delimiteer, its small variant
is found in position \hex{23} of family@1, and its large variant is found
in position \hex{56} of family@4. If the small or large variant is
given as |000|, however (position@0 of ↑{family@0}), that variant is ignored.
\TeX\ looks at the delcode when a character follows ↑{*left} or ↑{*right},
or when a character follows one of the ↑{.withdelims} commands; a
negative delcode leads to an error message, but otherwise \TeX\ finds
a suitable delimiter by first trying the small variant and then
the large. \ (Appendix@G discusses this process in more detail.) \
For example, Appendix@B contains the commands
\begintt
\delcode`(="028300 \delcode`.=0
\endtt
which specify that the small variant of a left parenthesis is found in
position \hex{28} of family@0, and that the large variant is in position@0
of family@3; also, a period has no variants, hence `|\left.|' will produce
a ↑{null delimiter}. There actually are several different left parenthesis
symbols in family@3; the smallest is in position@0, and the others are
linked together by information that comes with the font. All delcodes
are@$-1$ until they are changed by a |\delcode| command.
\ddangerexercise Appendix@B defines |\delcode`<| so that there is a
shorthand notation for ↑{angle brackets}. Why do you think Appendix@B
doesn't go further and define |\delcode`{|?
\answer If\/ |\delcode`{| were set to some nonnegative delimiter code, you
would get no error message when you wrote something like `|\left{|'.
This would be bad because strange effects would happen when certain
subformulas were given as arguments to macros, or when they appeared
in alignments. But it has an even worse defect, because a user who
gets away with `|\left{|' is likely to try also `|\bigl{|', which
fails miserably.
\ddanger A delimiter can also be given directly, as `↑{*delimiter}\<number>'.
In case case the number can be as high as \hex{7FFFFFF}, i.e., seven
hexadecimal digits; the leading digit specifies a class, from 0 to@7,
as in a |\mathchar|. For example, Appendix@B contains the definition
\begintt
\def\langle{\delimiter"426830A }
\endtt
and this means that ↑{:langle} is an opening (class 4) whose small
variant is \hex{268} and whose large variant is \hex{30A}. When |\delimiter|
appears after |\left| or |\right|, the class digit is ignored; but
when |\delimiter| occurs in other contexts, i.e., when \TeX\ isn't
looking for a delimiter, the three rightmost digits are dropped and
the remaining four digits act as a |\mathchar|. For example, the expression
`|$\langle x$|' is treated as if it were `|$\mathchar"4268 x$|'.
\ddangerexercise What goes wrong if you type
`|\bigl\delimiter"426830A|'\thinspace?
\answer Since |\bigl| is defined as a macro with one parameter,
it gets just `|\delimiter|' as the argument. You have to write
`|\bigl{\delimiter"426830A}|' to make this work. On the other hand,
|\left| will balk if the following character is a left brace. Therefore
it's best to have control sequence names for all delimiters.
\ddanger Granted that these numeric conventions for |\mathchar| and
|\delimiter| are not beautiful, they sure do pack a lot of information into
a small space. That's why \TeX\ uses them for low-level definitions inside
formats. Two other low-level primitives also deserve to be mentioned:
↑{*radical} and ↑{*mathaccent}. Plain \TeX\ makes ↑{square root signs}
and math accents available by giving the commands
\begintt
\def\sqrt{\radical"270370 }
\def\widehat{\mathaccent"362 }
\endtt
and several more like them. The idea is that |\radical| is followed by
a delimiter code and |\mathaccent| is followed by a math character code,
so that \TeX\ knows the family and character positions for the symbols
used in radical and accent constructions. Appendix@G gives precise
information about the positioning of these characters. By changing the
definitions, \TeX\ could easily be extended so that it would typeset a
variety of different radical signs and a variety of different accent
signs, if such symbols were available in the fonts.
↑(surd signs, see radical)
\ddanger Plain \TeX\ uses ↑{family@1} for math italic letters, ↑{family@2} for
ordinary math symbols, and ↑{family@3} for large symbols. \TeX\ insists that
↑(math fonts)
the fonts in families 2 and@3 have special ``texinfo'' parameters,
which govern mathematical spacing according to the rules in Appendix@G\null;
the ↑{.cmsy} and ↑{.cmathx} ↑{symbol fonts} have these parameters, so
their assignment to families 2 and@3 is almost mandatory. \ (There is, however,
a way to modify the texinfo of any font, using the ↑{*texinfo} command.) \
↑{.INITEX} initializes the mathcodes of all ↑{letters} |A| to@|Z| and |a| to@|z|
so that they are symbols of class@7 and family@1; that's why
it is natural to use family@1 for math italics. Similarly, the digits |0|
to@|9| are class@7 and family@0. None of the other families
is treated in any special way by \TeX. Thus, for example, plain \TeX\ puts
text italic in family@4, slanted roman in family@5, bold roman in family@6,
and typewriter type in family@7, but any of these numbers could be
switched around. There is a macro ↑{:newfam}, analogous to |\newbox|,
that will assign symbolic names to families that aren't already used.
\ddanger When \TeX\ is in horizontal mode, it is making a horizontal list;
in vertical mode, it is making a vertical list. Therefore it should come
as no great surprise that \TeX\ is making a ↑{math list} when it is in
↑{math mode}. The contents of horizontal lists were explained in Chapter@14,
and the contents of vertical lists were explained in Chapter@15; it's time
now to describe what math lists are made of. Each item in a math list
is one of the following types of things:\enddanger
\smallskip
\item\bull an ↑{atom} (to be explained momentarily);
\item\bull horizontal material (a rule or discretionary or penalty or
``whatsit'');
\item\bull a glob of ↑{glue} (from |\hskip| or |\mskip| or |\nonscript|);
\item\bull a ↑{kern} (from |\kern| or |\mkern|);
\item\bull a ↑{style change} (from |\displaystyle|, |\textstyle|, etc.);
\item\bull a ↑{generalized fraction} (from |\above|, |\over|, etc.);
\item\bull a ↑{boundary} (from |\left| or |\right|);
\item\bull a four-way ↑{choice} (from ↑{*mathchoice}).
\ddanger The most important items are called {\sl atoms}, and they have
three parts: a {\sl↑{nucleus}}, a {\sl↑{superscript}}, and a {\sl↑{subscript}}.
For example, if you type
\begintt
(x_i+y)↑{\overline{n+1}}
\endtt
in math mode, you get a math list consisting of five atoms:
$($, $x_i$, $+$, $y$, and@$)↑{\overline{n+1}}$. The nuclei of these atoms
are $($, $x$, $+$, $y$, and@$)$; the subscripts are empty except for the
second atom, which has subscript@$i$; the superscripts are empty for the
last atom, whose superscript is@$\overline{n+1}$. This superscript is
itself a math list consisting of one atom, whose nucleus is@$n+1$; and that
nucleus is a math list consisting of three atoms.
\ddanger There are thirteen kinds of atoms, each of which might act
differently in a formula; for example, `$($' is an Open atom because
it comes from an opening. Here is a complete list of the different kinds:
$$\halign{\indent#\hfil&\enskip#\hfil\cr
Ord&is an ordinary atom like `$x$'\thinspace;\cr
Op&is a large operator atom like `$\sum$'\thinspace;\cr
Bin&is a binary operation atom like `$+$'\thinspace;\cr
Rel&is a relation atom like `$=$'\thinspace;\cr
Open&is an opening atom like `$($'\thinspace;\cr
Close&is a closing atom like `$)$'\thinspace;\cr
Punct&is a punctuation atom like `$,$'\thinspace;\cr
Inner&is an inner atom like `$1\over2$'\thinspace;\cr
Over&is an overline atom like `$\overline x$'\thinspace;\cr
Under&is an underline atom like `$\underline x$'\thinspace;\cr
Acc&is an accented atom like `$\hat x$'\thinspace;\cr
Rad&is a radical atom like `$\sqrt2$'\thinspace;\cr
Vcent&is a vbox to be centered, produced by |\vcenter|.\cr
}$$
\ddanger An atom's nucleus, superscript, and subscript are called its
{\sl ↑{fields}}, and there are four possibilities for each of these fields;
a field can be\enddanger
\smallskip
\item\bull empty;
\item\bull a math symbol (specified by family and position number);
\item\bull a box; or
\item\bull a math list.
\smallskip\noindent
For example, the Close atom $)↑{\overline{n+1}}$ considered above has an
empty subscript field; its nucleus is the symbol `$)$', which is
character@\hex{28} of family@0 if the conventions of plain \TeX\ are
in force; and its superscript field is the math list $\overline{n+1}$.
The latter math list consists of an Over atom whose nucleus
is the math list $n+1$; and that math list, in turn, consists of
three atoms of types Ord, Bin, Ord.
\ddanger You can see \TeX's view of a math list by typing ↑{*showlists}
in math mode. ↑(internal list format)
For example, after `|$(x_i+y)↑{\overline{n+1}}\showlists|' your log
file gets the following curious data:
$$\halign{\indent#\hfil\cr
|\mathopen|\cr
|.\fam0 (|\cr
|\mathord|\cr
|.\fam1 x|\cr
|[\fam1 i|\cr
\noalign{\penalty-500}
|\mathbin|\cr
|.\fam0 +|\cr
\noalign{\penalty-500}
|\mathord|\cr
|.\fam1 y|\cr
\noalign{\penalty-500}
|\mathclose|\cr
|.\fam0 )|\cr
|(\overline|\cr
|(.\mathord|\cr
|(..\fam1 n|\cr
|(.\mathbin|\cr
|(..\fam0 +|\cr
|(.\mathord|\cr
|(..\fam0 1|\cr
}$$
In our previous experiences with |\showlists| we observed that there can
be boxes within boxes, and that each line in the log file is
prefixed by dots to indicate its position in the hierarchy. Math lists
have a slightly more complex structure; therefore a dot is used to denote
the nucleus of an atom, a@`|(|' is used for the superscript field, and
a@`|[|' is used for the subscript field. Empty fields are not shown. Thus,
for example, the Ord atom@$x_i$ is represented here by three lines
`|\mathord|', `|.\fam1 x|', and `|[\fam1 i|'.
\ddanger Certain kinds of atoms carry additional information besides their
nucleus, subscript, and superscript fields: An Op atom will be marked
`↑{*limits}' or `↑{*nolimits}' if the normal ↑{*displaylimits}
convention has been overridden; a radical atom contains
a delimiter field to specify what radical sign is to be used; and an Acc atom
contains the family and character codes of the accent symbol.
\ddanger When you say ↑{*hbox}|{...}| in math mode, an Ord atom is placed
on the current math list, with the hbox as its nucleus. Similarly,
↑{*vcenter}|{...}| produces a Vcent atom whose nucleus is a box. But in
most cases the nucleus of an atom will be either a symbol or a math list.
You can experiment with |\showlists| to discover how other things like
fractions and mathchoices are represented internally.
\ddanger Chapter@26 contains complete details of how math lists are
constructed. As soon as math mode ends (i.e., when the closing `|$|'
occurs), \TeX\ dismantles the current math list and converts it into a
horizontal list. The rules for this conversion are spelled out in
Appendix@G\null. You can see ``before and after'' representations of such math
typesetting by ending a formula with `|\showlists$\showlists|'; the first
|\showlists| will display the math list, and the second will show the
(possibly complex) horizontal list that is manufactured from it.
\endchapter
The learning time is short. A few minutes gives the general flavor, and
typing a page or two of a paper generally uncovers most of the misconceptions.
\author ↑{KERNIGHAN} and ↑{CHERRY}, {\sl A System for %
Typesetting Mathematics\/} (1975)
% in {\sl Communications of the ACM\/} p152
\bigskip
Within a few hours (a few days at most)
a typist with no math or typesetting experience
can be taught to input even the most complex equations.
\author PETER J. ↑{BOEHM}, {\sl Software and Hardware Considerations %
for a\break Technical Typesetting System\/} (1976)
% in {\sl IEEE Transactions on Professional Communication\/} PC-19, pp15--19
\eject
\beginchapter Chapter 18. Fine Points of\\Mathematics Typing
We have discussed most of the facilities needed to construct math
formulas, but there are several more things a good mathematical typist
will want to watch for. After you have typed a dozen or so formulas using
the basic ideas of Chapters 16 and@17, you will find that it's easy to
visualize the final appearance of a mathematical expression as you type
it. And once you have gotten to that level, there's only a little bit
more to learn before you are producing formulas as beautiful as any the
world has ever seen; tastefully applied touches of \TeX nique will add a
professional polish that works wonders for the appearance and readability
of the books and papers that you type. This chapter talks about such
tricks, and it also fills in a few gaps by mentioning some aspects of math
that didn't fit comfortably into@Chapters@16@and@17.
\subsection Punctuation. When a formula is followed by a ↑{period}, ↑{comma},
↑{semicolon}, ↑{colon}, ↑{question mark}, ↑{exclamation point}, etc., put the
↑{punctuation} {\sl after\/} the |$|, when the formula is in the text; but
put the punctuation {\sl before\/} the |$$| when the formula is displayed.
For example,
\begintt
If $x<0$, we have shown that $$y=f(x).$$
\endtt
\TeX's spacing rules within paragraphs work best when the
↑{punctuation marks} are not considered to be part of the formulas.
Similarly, don't ever type anything like
\begintt
for $x = a, b$, or $c$.
\endtt
It should be
\begintt
for $x = a$, $b$, or $c$.
\endtt
(Better yet, use a ↑{tie}: `|or@$c$|'.) \ The reason is that \TeX\ will
typeset expression `|$x|@|=|@|a,|@|b$|' as a single formula, so it will
put a ``↑{thin space}'' between the comma and the $b$. This space will
not be the same as the space that \TeX\ puts after the
comma {\sl after\/} the $b$, since spaces between words are always bigger than
thin spaces. Such unequal spacing looks bad, but when you type things right
the spacing will look good.
Another reason for not typing `|$x| |=| |a,| |b$|' is that it inhibits the
possibilities for breaking lines in a paragraph: \TeX\ will never break at
the space between the comma and the |b| because breaks after commas in
formulas are usually wrong. For example, in the equation
`|$x|@|=|@|f(a,|@|b)$|' we certainly don't want to put `$x=f(a,$' on one
line and `$b)$' on the next.
Thus, when typing formulas in the text of a paragraph, keep the math properly
segregated: Don't take operators like $-$ and $=$ outside of the |$|'s,
and keep commas inside the formula if they are truly part of the formula.
But if a comma or period or other punctuation mark belongs linguistically
to the sentence rather than to the formula, leave it outside the |$|'s.
\exercise Type this: $R(n,t)=O(t↑{n/2})$, as $t\rightarrow0↑+$.
\answer |$R(n,t)=O(t↑{n/2})$, as $t\rightarrow0↑+$.| \
(N.B.: `|O(|', not `|0('|.)
\danger Some mathematical styles insert a bit of extra space around
formulas to separate them from the text. For example, when copy is
being produced on an ordinary typewriter that doesn't have italic
letters, the best technical typists have traditionally put an extra
blank space before and after each formula, because this provides a
useful visual distinction. You might find it helpful to think of
each |$| as a symbol that has the potential of adding a little space
to the printed output; then the rule about excluding sentence
punctuation from formulas may be easier to remember.
\ddanger \TeX\ does, in fact, insert additional ↑{space} before and after each
formula; the amount of such space is called ↑{*mathsurround}, which is
a \<dimen>-valued parameter. For example, if you set |\mathsurround=1pt|,
each formula will effectively be 2@points wider ($1\pt$ at each side):
$$\baselineskip13pt\halign{\indent\mathsurround=#pt
For $x=a$, $b$, or $c$.\hfil&\quad(#)\hfil\cr
1&|\mathsurround=1pt|\cr 0&|\mathsurround=0pt|\cr}$$
This extra space will disappear into the left or right margin if the formula
occurs at the beginning or end of a line. The value of\/ |\mathsurround| that
is in force when \TeX\ reads the closing |$| of a formula is used at both
left and right of that formula. Plain \TeX\ takes |\mathsurround=0pt|, so
you won't see any extra space unless you are using some other format, or
unless you change |\mathsurround| yourself.
\subsection Non-italic letters in formulas. The names of algebraic variables
are usually italic or Greek letters, but common mathematical functions
like `log' are always set in ↑{roman type}. The best way to deal with such
constructions is to make use of the following 32@control sequences (all of
which are defined in plain \TeX\ format, see Appendix@B):
\begintt
\arccos \cos \csc \exp \ker \limsup \min \sinh
\arcsin \cosh \deg \gcd \lg \ln \Pr \sup
\arctan \cot \det \hom \lim \log \sec \tan
\arg \coth \dim \inf \liminf \max \sin \tanh
\endtt
↑(:arccos) ↑(:cos) ↑(:csc) ↑(:exp) ↑(:ker) ↑(:limsup) ↑(:min) ↑(:sinh)
↑(:arcsin) ↑(:cosh) ↑(:deg) ↑(:gcd) ↑(:lg) ↑(:ln) ↑(:Pr) ↑(:sup)
↑(:arctan) ↑(:cot) ↑(:det) ↑(:hom) ↑(:lim) ↑(:log) ↑(:sec) ↑(:tan)
↑(:arg) ↑(:coth) ↑(:dim) ↑(:inf) ↑(:liminf) ↑(:max) ↑(:sin) ↑(:tanh)
These control sequences lead to roman type with appropriate
spacing:\def\sep{&\hskip-1em}
\beginlongmathdemo
\it Input\sep\it Output\cr
\noalign{\vskip2pt}
|$\sin2\theta=2\sin\theta\cos\theta$|\sep\sin2\theta=2\sin\theta\cos\theta\cr
|$O(n\log n\log\log n)$|\sep O(n\log n\log\log n)\cr
|$\Pr(X>x)=\exp(-x/\mu)$|\sep\Pr(X>x)=\exp(-x/\mu)\cr
|$$\max_{1\le n\le m}\log_2P_n$$|\sep
\displaystyle{\max_{1\le n\le m}\log_2P_n}\cr
\noalign{\vskip2pt}
|$$\lim_{x\to0}{\sin x\over x}=1$$|\sep\displaystyle{\lim_{x\to0}
{\sin x\over x}=1}\cr
\endmathdemo
↑(:mu)
The last two formulas, which are displays, show that some of the special
control sequences are treated by \TeX\ as ``large operators'' with limits
just like $\sum$: the subscript on |\max| is not treated like the subscript
on |\log|. Subscripts and superscripts will become limits
when they are attached to |\det|, |\gcd|, |\inf|, |\lim|, |\liminf|,
|\limsup|, |\max|, |\min|, |\Pr|, and |\sup|, in display style.
\exercise Express the following display in plain \TeX\ language, using
`↑{:nu}' for `$\nu$':
$$p_1(n)=\lim_{m\to\infty}\sum_{\nu=0}↑\infty
\bigl(1-\cos↑{2m}(\nu!↑n\pi/n)\bigr).$$
\answer |$$p_1(n)=\lim_{m\to\infty}\sum_{\nu=0}↑\infty|\par
| \bigl(1-\cos↑{2m}(\nu!↑n\pi/n)\bigr).$$|\par
\smallskip\noindent $\bigl[$Mathematicians may enjoy
interpreting this formula; cf.@G.@H. ↑{Hardy},
{\sl Messenger of Mathematics\/ \bf35} (1906), 145--146.$\bigr]$
\danger If you need roman type for some mathematical function or operator
that isn't included in plain \TeX's list of@32, it is easy to define a
new control sequence by mimicking the definitions in Appendix@B\null. Or, if
you need roman type just for a ``one shot'' use, it is even easier to
get what you want by switching to ↑{:rm} type, as follows:
\beginlongmathdemo
|$\sqrt{{\rm Var}(X)}$|&\sqrt{{\rm Var}(X)}\cr
|$x_{\rm max}-x_{\rm min}$|&x_{\rm max}-x_{\rm min}\cr
|${\rm LL}(k)\Rightarrow{\rm LR}(k)$|&{\rm LL}(k)\Rightarrow{\rm LR}(k)\cr
|$\exp(x+{\rm constant})$|&\exp(x+{\rm constant})\cr
|$x↑3+{\rm lower\ order\ terms}$|&x↑3+{\rm lower\ order\ terms}\cr
\endmathdemo
Notice the uses of `|\|\]' ↑(escape space) in the last case;
without them, the result would have been `$x↑3+{\rm lower order terms}$',
because ordinary blank ↑{spaces} are ignored in math mode.
\danger You can also use ↑{*hbox} instead of\/ |\rm| to get roman letters
into formulas. For example, four of the last five formulas can be
generated by
\beginlongmathdemo
|$\sqrt{\hbox{Var}(X)}$|&\sqrt{\hbox{Var}(X)}\cr
|$\hbox{LL}(k)\Rightarrow\hbox{LR}(k)$|&\hbox{LL}(k)\Rightarrow\hbox{LR}(k)\cr
|$\exp(x+\hbox{constant})$|&\exp(x+\hbox{constant})\cr
|$x↑3+\hbox{lower order terms}$|&x↑3+\hbox{lower order terms}\cr
\endmathdemo
In this case `|\|\]' isn't necessary, because the material in an
|\hbox| is processed in horizontal mode, when spaces are significant.
But such uses of\/ |\hbox| have two disadvantages: \ (1)@The contents of the
box will be typeset in the same size, whether or not the box occurs as a
subscript; for example, `|$x_{\hbox{max}}$|' yields `$x_{\hbox{max}}$'.
\ (2)@The font that's used inside |\hbox| will be the ``↑{current font},''
so it might not be roman. For example, if you are typesetting the statement
of some theorem that is in slanted type, and if that theorem refers
to `|$\sqrt{\hbox{Var}(X)}$|', you will get the inintended result
`{\sl$\sqrt{\hbox{Var}(X)}$}'. In order to make sure that an |\hbox| uses
roman type, you need to specify |\rm|, e.g., `|$\sqrt{\hbox{\rm Var}(X)}$|';
and then the |\hbox| serves no purpose. We will see later, however, that
|\hbox| can be very useful in displayed formulas.
\ddangerexercise When the displayed formula
`|$$\lim_{n\to\infty}x_n {\rm\ exists} \iff|\break
|\limsup_{n\to\infty}x_n = \liminf_{n\to\infty}x_n.$$|' is typeset with
the standard macros of plain \TeX, you get
$$\lim_{n\to\infty}x_n{\rm\ exists}\iff
\limsup_{n\to\infty}x_n=\liminf_{n\to\infty}x_n.$$
But some people prefer a different notation: Explain how you could change
the definitions of\/ ↑{:limsup} and ↑{:liminf} so that the display would be
$$
\def\limsup{\mathop{\overline{\rm lim}}}
\def\liminf{\mathop{\underline{\rm lim}}}
\lim_{n\to\infty}x_n{\rm\ exists}\iff
\limsup_{n\to\infty}x_n=\liminf_{n\to\infty}x_n.$$
\answer |\def\limsup{\mathop{\overline{\rm lim}}}|\par
|\def\liminf{\mathop{\underline{\rm lim}}}|\par
\smallskip\noindent
[Notice that the limits `$n\to\infty$' appear at different levels, in both
of the displays, because `sup' and the underbar descend below the baseline.
It is possible to unify the limit positions by using ↑{phantoms}, as explained
later in this chapter. For example,
\begintt
\def\limsup{\mathop{\vphantom{\underline{}}\overline{\rm lim}}}
\endtt
would give lower limits in the same position as |\liminf|.]
\danger The word `mod' is also generally set in roman type, when it occurs
in formulas; but this word needs more care, because it is used in two
different ways that require two different treatments.
Plain \TeX\ provides two different control sequences,
↑{:bmod} and ↑{:pmod}, for the two cases: |\bmod| is to be used when
`mod' is a ↑{binary operation} (i.e., when it occurs between two quantities,
like a plus sign usually does), and |\pmod| is to be used when
`mod' occurs parenthetically at the end of a formula. For example,
\beginmathdemo
|$\gcd(m,n)=\gcd(n,m\bmod n)$|&\gcd(m,n)=\gcd(n,m\bmod n)\cr
|$x\equiv y+1\pmod{m↑2}$|&x\equiv y+1\pmod{m↑2}\cr
\endmathdemo
The `|b|' in `|\bmod|' stands for ``binary''; the `|p|' in `|\pmod|' stands
for ``parenthesized.'' Notice that |\pmod| inserts its own parentheses;
the quantity that appears after `mod' in the parentheses should be
enclosed in braces, if it isn't a single symbol.
\dangerexercise What did poor B. L. ↑{User} get when he typed
`|$x\equiv0 (\pmod y↑n)$|'\thinspace?
\answer $x\equiv0(\pmod y↑n)$. He should have typed
`|$x\equiv0\pmod{y↑n}$|'.
\dangerexercise Explain how to produce \lower12pt\null\
$\smash{\displaystyle{n\choose k}\equiv{\lfloor n/p\rfloor\choose
\lfloor k/p\rfloor}{n\bmod p\choose k\bmod p}\pmod p.}$
\answer |$${n\choose k}\equiv{\lfloor n/p\rfloor\choose|\par
| \lfloor k/p\rfloor}{n\bmod p\choose k\bmod p}\pmod p.$$|
\danger The same mechanism that works for roman type in formulas can be used
to get other styles of type as well. For example, ↑{:bf} yields ↑{boldface}:
\beginmathdemo
|$\bf a+b=\Phi_m$|&\bf a+b=\Phi_m\cr
\endmathdemo
Notice that whole formula didn't become emboldened in this example; the
`$+$' and `$=$' stayed the same. Plain \TeX\ sets things up so
that commands like |\rm| and |\bf| will affect only the upper-case letters
|A| to@|Z|, the lower-case letters |a| to@|z|, the digits |0| to@|9|,
the upper-case Greek letters |\Gamma| to@|\Omega|, and math ↑{accents}
like ↑{:hat} and ↑{:tilde}. Incidentally, no braces were used in this
example, because |$|'s have the effect of grouping; |\bf| changes the
current font, but the change is local, so it does not affect the font that
was current outside the formula.
\ddanger The bold fonts available in plain \TeX\ are ``bold roman,'' rather
than ``bold italic,'' because the latter are rarely needed. However, \TeX\
could readily be set up to make use of bold math italics, if desired
(see Exercise 17.\bmiexno). A more extensive set of math fonts would also
include ↑{script}, ↑{Fraktur}, and ``↑{blackboard bold}'' styles; plain
\TeX\ doesn't have these, but other formats like \AmSTeX\ do. ↑(AMS-TeX)
↑(German black letters)
\danger Besides |\rm| and |\bf|, you can say ↑{:cal} in formulas to get
upper-case letters in a ``↑{calligraphic}'' style. For example, `|$\cal
A$|' produces `$\cal A$' and `|$\cal Z$|' produces `$\cal Z$'. But beware:
This works only with the letters |A| to |Z|; you'll get weird results if
you apply |\cal| to lower-case or Greek letters.
\danger There's also ↑{:mit}, which stands for ``↑{math italic}.'' This
affects ↑{upper-case Greek}, so that you get
$\mit(\Gamma,\Delta,\Theta,\Lambda,\Xi,\Pi,\Sigma,\Upsilon,\Phi,\Psi,\Omega)$
instead of $(\Gamma,\ldots,\Omega)$. When@|\mit| is in effect, the
ordinary letters |A| to |Z| and |a| to |z| are not changed; they are set
in italics as usual, because they ordinarily come from the math italic
font. Conversely, upper-case Greek letters and math accents are unaffected
by |\rm|, because they ordinarily come from the roman font. Math accents
should not be used in conjunction with |\mit|, because the math italic
font contains no accents.
\dangerexercise Type the formula ${\bf\bar x}↑T\bf Mx={\rm0}\iff x=0$,
using as few keystrokes as possible. ↑(boldface numbers in math)
\answer |${\bf\bar x}↑T\bf Ax={\rm0}\iff x=0$|. \ (Another solution is
`|$\bf\bar x↑{\mit T}Ax={\rm0}\iff x=0$|', but this needs one more keystroke.)
\dangerexercise Figure out how to typeset
`$S\subseteq\mit\Sigma\iff S\in\cal S$'.
\answer |$S\subseteq{\mit\Sigma}\iff S\in{\cal S}$|. In this case the
braces are redundant and could be eliminated; but you shouldn't try to do
{\sl everything\/} with fewest keystrokes, or you'll outsmart yourself
some day.
\danger Plain \TeX\ also allows you to type ↑{:it}, ↑{:sl}, or ↑{:tt}, if
you want text italic, slanted, or typewriter letters to occur in a math
formula. However, these fonts are available only in text size, so you
should not try to use them in subscripts.
\danger If you're paying attention, you probably wonder why both
|\mit| and |\it| are provided; the answer is that |\mit| is ``math italic''
(which is normally best for formulas), and |\it| is ``text italic'' (which
is normally best for running text).
\beginmathdemo
|$This\ is\ math\ italic.$|&This\ is\ math\ italic.\cr
|{\it This is text italic.}|&\hbox{\it This is text italic.}\cr
\endmathdemo
The math italic letters are a little wider, and the spacing is different;
this works better in most formulas, but it fails spectacularly when
you try to type certain italic words like `$different$' using math mode
(`|$different$|'). A wide `$f$' is usually desirable in formulas, but it
is undesirable in text. Therefore wise typists
use |\it| in a math formula that is supposed
to contain an actual italic word. Such cases almost never occur in
classical mathematics, but they are common when ↑{computer programs}
are being typeset, since programmers often use multi-letter ``↑{identifiers}'':
\beginmathdemo
|$\it last:=first$|&\it last:=first\cr
|$\it x\_coord(point\_4)$|&\it x\_coord(point\_4)\cr
\endmathdemo
The first of these examples shows that \TeX\ recognizes the ↑{ligature}
`{\it fi\/}' when text italic occurs in a math formula;
the other example illustrates the use of short ↑{underlines} to break
up identifier names. ↑(control-underline)
When the author typeset this manual, he used `|$\it SS$|' to refer to
style@$\SS$, since `|$SS$|' makes the $S$'s too far apart: $SS$.
\dangerexercise What plain \TeX\ commands will produce the following display?
$$\tenmath
{\it available}+\sum_{i=1}↑n\max\bigl({\it full}(i),{\it reserved}(i)\bigr)
={\it capacity}.$$
\answer |$${\it available}+\sum_{i=1}↑n\max\bigl({\it full}(i),|\par
|{\it reserved}(i)\bigr)={\it capacity}.$$|
\smallskip\noindent [If\/ |\it| had been used throughout
the formula, the subscript@$i$ and superscript@$n$ would have caused error
messages saying `↑{*scriptfont} |4| |is| |undefined|',
since plain \TeX\ makes |\it| available only in text size.]
\ddangerexercise How would you go about typesetting the following computer
program, using the macros of plain \TeX?
$$\vbox{\let\par=\endgraf
\obeylines\sfcode`;=3000
{\bf for $j:=2$ step $1$ until $n$ do}
\quad {\bf begin} ${\it accum}:=A[j]$; $k:=j-1$; $A[0]:=\it accum$;
\quad {\bf while $A[k]>\it accum$ do}
\qquad {\bf begin} $A[k+1]:=A[k]$; $k:=k-1$;
\qquad {\bf end};
\quad $A[k+1]:=accum$;
\quad {\bf end}.
}$$
\answer |{\obeylines \sfcode`;=3000|↑(*sfcode)\par
|{\bf for $j:=2$ step $1$ until $n$ do}|\par
|\quad {\bf begin} ${\it accum}:=A[j]$; $k:=j-1$; $A[0]:=\it accum$;|\par
|\quad {\bf while $A[k]>\it accum$ do}|\par
|\qquad {\bf begin} $A[k+1]:=A[k]$; $k:=k-1$;|\par
|\qquad {\bf end};|\par
|\quad $A[k+1]:=accum$;|\par
|\quad {\bf end}.}|\par
\smallskip\noindent
[This is something like the ``poetry'' example in Chapter@14, but much
more difficult. Some manuals of style say that ↑{punctuation} should inherit
the font of the preceding character, so that three kinds of semicolons
should be typeset; e.g., these experts recommend `$k:=j-1$; \
$A[0]:={}${\it accum;} \ {\bf end;}'. The author heartily disagrees.]
\subsection Spacing between formulas. ↑{Displays} often contain more than one
formula; for example, an equation is frequently accompanied by a ↑{side
condition}:
$$F_n=F_{n-1}+F_{n-2},\qquad n\ge2.$$
In such cases you need to tell \TeX\ how much space to put after the comma,
because \TeX's normal spacing conventions would bunch things together;
without special precautions you would get
$$F_n=F_{n-1}+F_{n-2}, n\ge2.$$
The traditional hot-metal technology for printing has led to some ingrained
standards for situations like this, based on what printers call a ``↑{quad}''
of space. Since these standards seem to work well in practice, \TeX\ makes
it easy for you to continue the tradition: When you type `↑{:quad}' in plain
\TeX\ format, you get a printer's quad of space in the horizontal direction.
Similarly, `↑{:qquad}' gives you a double quad (twice as much); this
is the normal spacing for situations like
the $F_n$ example above. Thus, the recommended procedure is to type
\begintt
$$ F_n = F_{n-1} + F_{n-2}, \qquad n \ge 2. $$
\endtt
It is perhaps worth reiterating that \TeX\ ignores all the spaces in math
mode (except, of course, the space after `|\qquad|', which is needed
to distinguish between `|\qquad|@|n|' and `|\qquadn|'); so the same result
would be obtained if you were to leave out all but one space:
\begintt
$$F_n=F_{n-1}+F_{n-2},\qquad n\ge2.$$
\endtt
Whenever you want spacing that differs from the normal conventions, you must
specify it explicitly by using control sequences such as |\quad| and |\qquad|.
\danger A quad used to be a square piece of blank type, $1\,$em wide and $1\,$em
tall---approximately the size of a capital M, as explained in Chapter@10. This
tradition has not been fully retained: The control sequence |\quad| in plain
\TeX\ is simply an abbreviation for `|{\hskip|@|1|↑{.em}|}|', so \TeX's quad has
width but no height.
\danger You can use |\quad| in text as well as in formulas; for example,
Chapter@14 illustrates how |\quad| applies to poetry. When |\quad| appears
in a formula it stands for one@em in the current text font, independent of the
current math size or style or family. Thus, for example, |\quad| is just
as wide in a subscript as it is on the main line of a formula.
Sometimes a careless author will put two formulas next to each other in
the text of a paragraph. For example, you might find a sentence like this:
$$\displaybox{The Fibonacci numbers satisfy
$F_n=F_{n-1}+F_{n-2}$, \ $n\ge2$.}$$
↑(Fibonacci)
Everybody who teaches proper ↑{mathematical} ↑{style} is agreed that formulas
ought to be separated by words, not just by commas; the author of that
sentence should at least have said `for $n\ge2$', not simply `$n\ge2$'.
But alas, such lapses are commonplace, and many prominent mathematicians
are hopelessly addicted to clusters of formulas. If we are not allowed to change
their writing style, we can at least insert extra space where they
neglected to insert an appropriate word. An additional interword space
generally works well in such cases; for example, the sentence above was
typeset thus:
\begintt
... $F_n=F_{n-1}+F_{n-2}$, \ $n\ge2$.}$$
\endtt
The `|\|\]' ↑(escape space) here gives a visual separation that
partly compensates for the bad style.
\exercise Put the following paragraph into \TeX\ form, treating punctuation
and spacing carefully; also insert ↑{ties} to prevent bad line breaks.
$$\baselineskip13pt\displayvbox{\raggedright\hsize=310pt\parindent=0pt
Let $H$@be a Hilbert space, \
$C$@a closed bounded convex subset of@$H$, \
$T$@a nonexpansive self map of@$C$.
Suppose that as $n\to\infty$, \ $a_{n,k}\to0$ for each@$k$,
and $\gamma_n=\sum_{k=0}↑\infty(a_{n,k+1}-a_{n,k})↑+\to0$.
Then for each $x$@in@$C$, \
$A_nx=\sum_{k=0}↑\infty a_{n,k}T↑kx$ converges weakly
to a fixed point of@$T$.
}$$ % taken from Bull. AMS 82 (1976), p 959; chosen by AMS in '78 for demo
\answer |Let $H$@be a Hilbert space, \
$C$@a closed bounded convex subset of@$H$, \
$T$@a nonexpansive self map of@$C$.
Suppose that as $n\to\infty$, \ $a_{n,k}\to0$ for each@$k$,
and $\gamma_n=\sum_{k=0}↑\infty(a_{n,k+1}-a_{n,k})↑+\to0$.
Then for each $x$@in@$C$, \
$A_nx=\sum_{k=0}↑\infty a_{n,k}T↑kx$ converges weakly
to a fixed point of@$T$.|\par
[If any mathematicians are reading this, they might either appreciate
or resent the following attempt to edit the given paragraph
into a more acceptable style: ``%
Let $C$@be a closed, bounded, convex subset of a Hilbert space@$H$,
and let $T$@be a nonexpansive self map of@$C$.
Suppose that as $n\to\infty$, we have $a_{n,k}\to0$ for each@$k$,
and $\gamma_n=\sum_{k=0}↑\infty(a_{n,k+1}-a_{n,k})↑+\to0$.
Then for each $x$@in@$C$, the infinite sum
$A_nx=\sum_{k=0}↑\infty a_{n,k}T↑kx$ converges weakly
to a fixed point of@$T$.'']
\subsection Spacing within formulas. Chapter 16 says that \TeX\ does
automatic ↑{spacing} of math formulas so that they look right, and this is
almost true. But occasionally you must give \TeX\ some help. The number of
possible math formulas is vast, and \TeX's spacing rules are rather
simple, so it is natural that exceptions should arise. Of course, it is
desirable to have fine units of spacing for this purpose, instead of the
big chunks that arise from `|\|\]', |\quad| and |\qquad|.
The basic elements of space that \TeX\
puts into formulas are called {\sl ↑{thin spaces}}, {\sl ↑{medium
spaces}}, and {\sl ↑{thick spaces}}. In order to get a feeling for these
units, let's take a look at the $F_n$ example again: thick spaces occur
just before and after the = sign, and also before and after the $\ge$;
medium spaces occur just before and after the $+$ sign. Thin spaces are
slightly smaller, but noticeable; it's a thin space that makes the
difference between `loglog' and `$\log\log$'. The normal space between
words of a paragraph is approximately equal to two thin spaces.
\TeX\ inserts thin spaces, medium spaces, and thick spaces into formulas
automatically, but you can add your own spacing whenever you want to,
by using the control sequences
$$\halign{\indent#\hfil&\quad#\hfil\cr
|\,|&thin space \ (normally 1/6 of a quad);\cr
|\>|&medium space \ (normally 2/9 of a quad);\cr
|\;|&thick space \ (normally 5/18 of a quad);\cr
|\!|&negative thin space \ (normally $-1/6$ of a quad).\cr}$$
In most cases you can rely on \TeX's spacing while you are typing a manuscript,
and you'll want to insert or delete space with these four control sequences
only in rare circumstances after you see what comes out.
\ddanger We observed a minute ago that |\quad| spacing does not
change with the style of formula, nor does it depend on the math font
families that are being used. But thin spaces, medium spaces, and thick
spaces do get bigger and smaller as the size of type gets bigger and
smaller; this is because they are defined in terms of ↑{<muglue}, a@special
brand of glue intended for math spacing. You specify \<muglue> just
as if it were ordinary glue, except that the units are given in terms of
`↑{.mu}' (math units) instead of@|pt| or@|cm| or something else. For
example, Appendix@B contains the definitions
\begintt
\thinmuskip = 3mu
\medmuskip = 4mu plus 2mu minus 4mu
\thickmuskip = 5mu plus 5mu
\endtt
↑(*thinmuskip)↑(*medmuskip)↑(*thickmuskip)
and this defines the thin, medium, and thick spaces that \TeX\ inserts
into formulas. According to these specifications, thin spaces in plain
\TeX\ do not stretch or shrink; medium spaces can stretch a little, and
they can shrink to zero; thick spaces can stretch a lot, but they never shrink.
\ddanger There are 18 mu to an em, where the em is taken from family@2
(the math symbols family). In other words, ↑{*textfont}@|2| defines the em
value for |mu| in display and text styles; ↑{*scriptfont}@|2| defines the
em for script size material; and ↑{*scriptscriptfont}@|2| defines it for
scriptscript size.
\ddanger You can insert math glue into any formula just by giving
the command `↑{*mskip}\<muglue>'. For example, `|\mskip 9mu plus 2mu|'
inserts one half em of space, in the current size, together with some
stretchability. Appendix@B defines `|\,|' to be an abbreviation for
`|\mskip\the\thinmuskip|'. Similarly, you can use the command `↑{*mkern}'
when there is no stretching or shrinking; `|\mkern18mu|' gives one em of
horizontal space in the current size. \TeX\ insists that |\mskip| and
|\mkern| be used only with |mu|; conversely, ↑{*hskip} and ↑{*kern} (which
are also allowed in formulas) must never give units in |mu|.
Formulas involving ↑{calculus} look best when an extra thin space appears
before $dx$ ↑(dx) or@$dy$ or@$d\,$whatever; but \TeX\ doesn't do this
automatically. Therefore a well-trained typist will remember to insert
`|\,|' in examples like the following:
\beginmathdemo
\it Input&\it Output\cr
\noalign{\vskip2pt}
|$\int_0↑\infty f(x)\,dx$|&\int_0↑\infty f(x)\,dx\cr
|$y\,dx-x\,dy$|&y\,dx-x\,dy\cr
|$dx\,dy=r\,dr\,d\theta$|&dx\,dy=r\,dr\,d\theta\cr
|$x\,dy/dx$|&x\,dy/dx\cr \endmathdemo Notice that no `|\,|' was desirable
after the `|/|' in the last example. Similarly, there's no need for
`|\,|' in cases like
\begindisplaymathdemo
|$$\int_1↑x{dt\over t}$$|&\int_1↑x{dt\over t}\cr
\endmathdemo
since the $dt$ appears all by itself in the numerator of a fraction; this
detaches it visually from the rest of the formula.
\exercise Explain how to handle the display
$$\int_0↑\infty{t-ib\over t↑2+b↑2}e↑{iat}\,dt=e↑{ab}E_1(ab),\qquad a,b>0.$$
\answer |$$\int_0↑\infty{t-ib\over t↑2+b↑2}e↑{iat}\,dt=|\par
| e↑{ab}E_1(ab),\qquad a,b>0.$$|
\danger When physical ↑{units} appear in a formula, they should be set in roman
type and separated from the preceding material by a thin space:
\beginmathdemo
|$55\rm\,mi/hr$|&55\rm\,mi/hr\cr
|$g=9.8\rm\,m/sec↑2$|&g=9.8\rm\,m/sec↑2\cr
|$\rm1\,ml=1.000028\,cc$|&\rm1\,ml=1.000028\,cc\cr
\endmathdemo
\dangerexercise Typeset the following display, assuming that `↑{:hbar}'
generates `$\hbar$':
$$\hbar=1.0545\times10↑{-27}\rm\,erg\,sec.$$
\answer |$$\hbar=1.0545\times10↑{-27}\rm\,erg\,sec.$$|
\danger Thin spaces should also be inserted after ↑{exclamation points}
(which stand for the ``↑{factorial}'' operation in a formula), if the next
character is a letter or a number or an opening delimiter:
\beginmathdemo
|$(2n)!/\bigl(n!\,(n+1)!\bigr)$|&(2n)!/\bigl(n!\,(n+1)!\bigr)\cr
\noalign{\vskip6pt}
|$${52!\over13!\,13!\,26!}$$|&\displaystyle{52!\over13!\,13!\,26!}\cr
\endmathdemo
Besides these cases, you will occasionally encounter formulas in which
the symbols are bunched up too tightly, or where too much white space
appears, because of certain unlucky combinations of shapes. It's usually
impossible to anticipate optical glitches like this until you see the first
proofs of what you have typed; then you get to use your judgment about how
to add finishing touches that provide extra beauty, clarity, and finesse.
A tastefully applied `|\,|' or `|\!|' will open things up or close things
together so that the reader won't be distracted from the mathematical
significance of the formula. ↑{Square root} signs and ↑{multiple integrals} are
often candidates for such fine tuning. Here are some examples of situations
to look out for:
\beginmathdemo
|$\sqrt2\,x$|&\sqrt2\,x\cr
|$\sqrt{\,\log x}$|&\sqrt{\,\log x}\cr
|$O\bigl(1/\sqrt n\,\bigr)$|&O\bigl(1/\sqrt n\,\bigr)\cr
|$[\,0,1)$|&[\,0,1)\cr
|$\log n\,(\log\log n)↑2$|&\log n\,(\log\log n)↑2\cr
|$x↑2\!/2$|&x↑2\!/2\cr
|$n/\!\log n$|&n/\!\log n\cr
|$\Gamma_{\!2}+\Delta↑{\!2}$|&\Gamma_{\!2}+\Delta↑{\!2}\cr
|$R_i{}↑j{}_{\!kl}$|&R_i{}↑j{}_{\!kl}\cr
|$\int_0↑x\!\int_0↑y dF(u,v)$|&\int_0↑x\!\int_0↑y dF(u,v)\cr
\noalign{\vskip6pt}
|$$\int\!\!\!\int_D dx\,dy$$|&\displaystyle{\int\!\!\!\int_D dx\,dy}\cr
\endmathdemo
↑(:Gamma)↑(:Delta)↑(:intint)
In each of these formulas the omission of\/ |\,| or |\!| would lead to
somewhat less satisfactory results.
\ddanger Most of these examples where thin-space corrections are desirable
arise because of chance coincidences. For example, the superscript in
|$x↑2/2$| leaves a hole before the slash ($x↑2/2$); a negative thin
space helps to fill that hole. The positive thin space in |$\sqrt{\,\log x}$|
compensates for the fact that `$\log x$' begins with a thin, unslanted
letter; and so on. But two of the examples involve corrections that
were necessary because \TeX\ doesn't really know a great deal about
mathematics: \ (1)@In the formula
|$\log n(\log\log n)↑2$|, \TeX\ inserts no thin space before the left
parenthesis, because there are similar formulas like |$\log n(x)$| where
no such space is desired. \ (2)@In the formula |$n/\log n$|, \TeX\
automatically inserts an unwanted thin@space before |\log|, since the slash is
treated as an ordinary symbol, and since a@thin space is usually called for
between an ordinary symbol and an operator like |\log|.
\ddanger In fact, \TeX's rules for spacing in formulas are fairly simple.
A formula is converted to a math list as described at the end of Chapter@17,
and the math list consists chiefly of ``↑{atoms}'' of eight basic types:
↑{Ord}@(↑{ordinary}), ↑{Op}@(↑{large operator}), ↑{Bin}@(↑{binary operation}),
↑{Rel}@(↑{relation}), ↑{Open}@(↑{opening}), ↑{Close}@(↑{closing}),
↑{Punct}@(punctuation), ↑(punctuation) and ↑{Inner}@(a delimited
subformula). Other kinds of atoms, which arise from commands like
↑{*overline} or ↑{*mathaccent} or ↑{*vcenter}, etc., are all treated as
type@Ord; ↑{fractions} are treated as type@Inner. The following table is
used to determine the spacing between pairs of adjacent atoms:
$$\baselineskip0pt\lineskip0pt
\halign to\the\hsize
{\strut\hbox to\the\parindent{\it#\hfil}& % for the legend "Left atom"
#\hfil\quad& % for the row labels
#\hfil\tabskip 0pt plus 10pt& % for the rule at the left
\hbox to 25pt{\tt\hss#\hss}& % for column 1
\hbox to 25pt{\tt\hss#\hss}& % for column 2
\hbox to 25pt{\tt\hss#\hss}& % for column 3
\hbox to 25pt{\tt\hss#\hss}& % for column 4
\hbox to 25pt{\tt\hss#\hss}& % for column 5
\hbox to 25pt{\tt\hss#\hss}& % for column 6
\hbox to 25pt{\tt\hss#\hss}& % for column 7
\hbox to 25pt{\tt\hss#\hss}& % for column 8
#\hfil\tabskip0pt\cr % for the rule at the right
\noalign{\vskip-6pt} % it just happens that there's extra white space
&&&&\multispan7\hss\it Right atom\hss\cr
\noalign{\vskip3pt}
&&&\rm Ord&\rm Op&\rm Bin&\rm Rel&\rm Open&\rm Close&\rm Punct&\rm Inner\cr
\noalign{\vskip2pt}
\omit&&\multispan{10}\leaders\hrule\hfil\cr
\omit\vbox to 2pt{}&&\vrule&&&&&&&&&\vrule\cr
&Ord&\vrule&0&1&(2)&(3)&0&0&0&(1)&\vrule\cr
&Op&\vrule&1&1&*&(3)&0&0&0&(1)&\vrule\cr
&Bin&\vrule&(2)&(2)&*&*&(2)&*&*&(2)&\vrule\cr
Left&Rel&\vrule&(3)&(3)&*&0&(3)&0&0&(3)&\vrule\cr
atom&Open&\vrule&0&0&*&0&0&0&0&0&\vrule\cr
&Close&\vrule&0&1&(2)&(3)&0&0&0&(1)&\vrule\cr
&Punct&\vrule&(1)&(1)&*&(3)&(1)&(1)&(1)&(1)&\vrule\cr
&Inner&\vrule&(1)&1&(2)&(3)&(1)&0&(1)&(1)&\vrule\cr
\omit\vbox to 2pt{}&&\vrule&&&&&&&&&\vrule\cr
\omit&&\multispan{10}\leaders\hrule\hfil\cr}$$
↑(spacing table) ↑(math spacing table)
Here 0, 1, 2, and 3 stand for no space, thin space, medium space, and
thick space, respectively; the table entry is parenthesized if the space
is to be inserted only in display and text styles, not in script and
scriptscript styles. For example, many of the entries in the Rel row
and the Rel column are `{\tt(3)}'; this means that thick spaces are normally
inserted before and after relational symbols like `=', but not in
subscripts. Some of the entries in the table are `{\tt*}'; such cases
never arise, because Bin atoms must be preceded and followed by atoms
compatible with the nature of binary operations. Appendix@G contains
precise details about how math lists are converted to horizontal lists;
this conversion is done whenever \TeX\ is about to leave math mode, and the
inter-atomic spacing is inserted at that time.
\ddanger For example, the displayed formula specification
\begintt
$$x+y=\max\{x,y\}+\min\{x,y\}$$
\endtt
will be transformed into the sequence of atoms
\def\\#1{\vbox to 33pt{\vbox to 22pt{\vfill\hrule
\hbox{\vrule\hskip-.4pt$#1$\hskip-.4pt\vrule}}\hrule\vfill}}%
$$\display\vbox{\vskip-11pt\hbox{$
\\x\;\;\\+\;\;\\y\;\;\\=\;\;\\\max\;\;\\\{\;\;
\\x\;\;\\,\;\;\\y\;\;\\\}\;\;\\+\;\;\\\min\;\;\\\{
\;\;\\x\;\;\\,\;\;\\y\;\;\\\}$}\vskip-11pt}$$
of respective types Ord, Bin, Ord, Rel, Op, Open, Ord, Punct, Ord, Close,
Bin, Op, Open, Ord, Punct, Ord, and Close.
Inserting spaces according to the table gives
$$\def\0{\thinspace}
\def\1{\thinspace{\tt\char`\\,}\thinspace}
\def\2{\thinspace{\tt\char`\\>}\thinspace}
\def\3{\thinspace{\tt\char`\\;}\thinspace}
\halign{\indent\hfil#\cr
Ord\2Bin\2Ord\3Rel\3Op\0Open\0Ord\0Punct\1Ord\0Close\qquad\cr
Bin\2Op\0Open\0Ord\0Punct\1Ord\0Close\cr}$$
and the resulting formula is
$$\vbox{\vskip-11pt\hbox{$
\\x\>\\+\>\\y\;\\=\;\\\max\\\{
\\x\\,\,\\y\\\}\>\\+\>\\\min\\\{
\\x\\,\,\\y\\\}$}\vskip-11pt}$$
i.e.,$$x+y=\max\{x,y\}+\min\{x,y\}\rlap{\quad.}$$
This example doesn't involve subscripts or superscripts; but subscripts and
superscripts merely get attached to atoms without changing the atomic type.
\ddangerexercise Use the table to determine what spacing \TeX\
will insert between the atoms of the formula `|$f(x,y)<x↑2+y↑2$|'.
\answer There are ten atoms (the first is $f$ and last is $y↑2$); their types,
and the interatomic spacing, are respectively
$$\def\0{\thinspace}
\def\1{\thinspace{\tt\char`\\,}\thinspace}
\def\2{\thinspace{\tt\char`\\>}\thinspace}
\def\3{\thinspace{\tt\char`\\;}\thinspace}
\displaybox{Ord\0Open\0Ord\0Punct\1Ord\0Close\3Rel\3Ord\2Bin\2Ord.}$$
\ddanger The plain \TeX\ macros ↑{:bigl}, ↑{:bigr}, ↑{:bigm}, and ↑{:big}
all produce identical delimiters; the only difference between them is that
they may lead to different spacing, because they make the delimiter into
different types of atoms: |\bigl| produces an Open atom, |\bigr| a@Close,
|\bigm| a@Rel, and |\big| an@Ord. On the other hand, when a subformula
appears between ↑{*left} and ↑{*right}, it is typeset by itself and placed
into an Inner atom. Therefore it is possible that a subformula enclosed by
|\left| and |\right| will be surrounded by more space then there would be
if that subformula were enclosed by |\bigl| and@|\bigr|. For example, Ord
followed by Inner (from |\left|) gets a thin space, but Ord followed by
Open (from |\bigl|) does not. The rules in Chapter@17 imply that the
construction `↑{*mathinner}|{\bigl({|\<subformula>|}\bigr)}|' within any
formula produces a result exactly equivalent to
`|\left(|\<subformula>|\right)|', except that the ↑{delimiters} are forced to
be of the |\big| size regardless of the height and depth of the subformula.
\danger \TeX's spacing rules sometimes fail when `\vrt' and `|\|\vrt'
appear in a formula, because $\vert$ and $\|$ are treated as ordinary symbols
↑(verticalline)↑(:verticalline)
instead of as delimiters. For example, consider the formulas
\beginlongmathdemo
|$|\vrt|-x|\vrt|=|\vrt|+x|\vrt|$|&\vert-x\vert=\vert+x\vert\cr
|$\left|\vrt|-x\right|\vrt|=\left|\vrt|+x\right|\vrt|$|&
\left\vert-x\right\vert=\left\vert+x\right\vert\cr
|$\lfloor-x\rfloor=-\lceil+x\rceil$|&\lfloor-x\rfloor=-\lceil+x\rceil\cr
\endmathdemo
In the first case the spacing is wrong because \TeX\ thinks that the plus
sign is computing the sum of `$\vert$' and `$x$'. The use of\/ |\left| and
|\right| in the second example puts \TeX\ on the right track. The third
example shows that no such corrections are needed with other delimiters,
because \TeX\ knows whether they are openings or closings.
\ddangerexercise Some perverse mathematicians use ↑{brackets} backwards,
to denote ``↑{open intervals}.''
Explain how to type the following bizarre formula: % from MR53 #3451
$\left]-\infty,T\right[\times\left]-\infty,T\right[$.
\answer |$\left]-\infty,T\right[\times\left]-\infty,T\right[$|. \ (Or one could
say ↑{*mathopen} and ↑{*mathclose} instead of\/ |\left| and |\right|;
then \TeX\ would not choose the size of the delimiters, nor would it consider
the subformulas to be of type Inner.) \
% that formula was quoted from MR review of paper by Mario Marino
% in Ricerche Mat. 24 (1975), no.@1, 152--171
Open intervals are more clearly expressed in print
by using parentheses instead of reversed brackets; for example,
compare `$(-\infty,T)\times(-\infty,T)$' to the given formula.
\ddangerexercise Study Appendix G and determine what spacing will be used
in the formula `|$x++1$|'. Which of the plus signs will be regarded
as a ↑{binary operation}?
\answer The first |+| will become a Bin atom, the second an Ord; hence
the result is $x$, medium space, $+$, medium space, $+$, no space, 1.
\subsection Ellipses\/ {\rm(``three dots'')}. ↑(ellipses)
Mathematical copy looks much nicer if you are careful about how groups of
↑{three dots} are typed in formulas and text. Although it looks fine to
type `|...|' on a typewriter that has fixed spacing, the result looks too
crowded when you're using a printer's fonts: `|$x...y$|' results in
`$x...y$', and such close spacing is undesirable except in subscripts or
superscripts.
An ellipsis can be indicated by two different kinds of dots, one higher
than the other; the best mathematical traditions distinguish between
these two possibilities. It is generally correct to produce formulas like
$$\display x_1+\cdots+x_n\qquad {\rm and}\qquad (x_1,\ldots,x_n),$$
but wrong to produce formulas like
$$\display x_1+\ldots+x_n\qquad {\rm and}\qquad (x_1,\cdots,x_n).$$
The plain \TeX\ format of Appendix B allows you to solve the ``three dots''
problem very simply, and everyone will be envious of the beautiful formulas
that you produce. The idea is simply to type ↑{:ldots} when you want three
low dots@($\,\ldots\,$), and ↑{:cdots} when you want three vertically centered
dots@($\,\cdots\,$).
In general, it is best to use |\cdots| between $+$ and $-$ and@$\times$
signs, and also between $=$@signs or $\le$@signs or $\subset$ signs or other
similar relations. Low dots are used between ↑{commas}, and when things
are juxtaposed with no signs between them at all. For example:
\beginmathdemo
|$x_1+\cdots+x_n$|&x_1+\cdots+x_n\cr
|$x_1=\cdots=x_n=0$|&x_1=\cdots=x_n=0\cr
|$A_1\times\cdots\times A_n$|&A_1\times\cdots\times A_n\cr
|$f(x_1,\ldots,x_n)$|&f(x_1,\ldots,x_n)\cr
|$x_1x_2\ldots x_n$|&x_1x_2\ldots x_n\cr
|$(1-x)(1-x↑2)\ldots(1-x↑n)$|&(1-x)(1-x↑2)\ldots(1-x↑n)\cr
|$n(n-1)\ldots(1)$|&n(n-1)\ldots(1)\cr
\endmathdemo
\exercise Type the formulas `$x_1+x_1x_2+\cdots+x_1x_2\ldots x_n$' and
`$(x_1,\ldots,x_n)\cdot(y_1,\ldots,y_n)=x_1y_1+\cdots+x_ny_n$'.
\ [{\sl Hint:\/} A single raised dot is called `↑{:cdot}'.]
\answer |$x_1+x_1x_2+\cdots+x_1x_2\ldots x_n$| \ and\hfil\break
|$(x_1,\ldots,x_n)\cdot(y_1,\ldots,y_n)=x_1y_1+\cdots+x_ny_n$|.
But there's an important special case in which |\ldots| and |\cdots|
don't give the correct spacing, namely when they appear at the very end
of a formula, or when they appear just before a closing delimiter like@`|)|'.
In such situations an extra ↑{thin space} is needed. For example, consider
sentences like this:
$$\halign{\indent#\hfil\cr
Prove that $(1-x)↑{-1}=1+x+x↑2+\cdots\,$.\cr
Clearly $a_i<b_i$ for $i=1$,@2, $\ldots\,$,@$n$.\cr
The coefficients $c_0$,@$c_1$, \dots,@$c_n$ are positive.\cr}$$
To get the first sentence, the author typed
\begintt
Prove that $(1-x)↑{-1}=1+x+x↑2+\cdots\,$.
\endtt
Without the `↑{:,}' the period would have come too close to the |\cdots|.
Similarly, the second sentence was typed thus:
\begintt
Clearly $a_i<b_i$ for $i=1$,@2, $\ldots\,$,@$n$.
\endtt
Notice the use of ↑{ties}, which prevent bad line breaks as explained in
Chapter@14. Such ellipses are extremely common in some forms of mathematical
writing, so plain \TeX\ allows you to say just `↑{:dots}' as an abbreviation
for `|$\ldots\,$|' in the text of a paragraph. The third sentence
can therefore be typed
\begintt
The coefficients $c_0$,@$c_1$, \dots,@$c_n$ are positive.
\endtt
\exercise B. C. ↑{Dull} tried to take a shortcut by typing the
second example this way:
\begintt
Clearly $a_1<b_i$ for@$i=1, 2, \ldots, n$.
\endtt
What's so bad about that?
\answer The commas belong to the sentence, not to the formula; his
decision to put them into math mode meant that \TeX\ didn't put large
enough spaces after them. Also, his formula `$i=1, 2, \ldots, n$' allows
no breaks between lines, except after the $=$, so he's risking
overfull box problems. But suppose the sentence had been more terse:
$$\displaybox{Clearly $a_i<b_i$ \ ($i=1,2,\ldots,n$).}$$
Then his idea would be basically correct:
\begintt
Clearly $a_i<b_i$ \ ($i=1,2,\ldots,n$).
\endtt
\exercise How do you think the author typed the footnote in Chapter 4
of this book?
\answer $\ldots$ |never\footnote*{Well \dots, hardly ever.} have| $\ldots$
\subsection Line breaking. When you have formulas in a paragraph, \TeX\
may have to break them between lines. This is a necessary evil, something
like the hyphenation of words; we want to avoid it unless the alternative
is worse. ↑(line breaking in math) ↑(breaking formulas between lines)
A formula will be broken only after a relation symbol like $=$
or@$<$ or $\to$, or after a binary operation symbol like $+$ or $-$ or
$\times$, where the relation or binary operation is on the ``outer level'' of
the formula (i.e., not enclosed in |{...}| and not part of an `|\over|'
construction). For example, if you type
\begintt
$f(x,y) = x↑2-y↑2 = (x+y)(x-y)$
\endtt
in mid-paragraph, there's a chance that \TeX\ will break after either of the
|=|@signs (it prefers this) or after the@|-| or@|+| or@|-| (in an emergency).
But there won't be a break after the comma in any case---commas
after which breaks are desirable shouldn't appear between |$|'s.
If you don't want to permit breaking in this example except after the
|=|@signs, you could type
\begintt
$f(x,y) = {x↑2-y↑2} = {(x+y)(x-y)}$
\endtt
because these additional braces ``freeze'' the ↑{subformulas}, putting them
into unbreakable boxes in which the glue has been set to its natural width.
But it isn't necessary to bother worrying about such things unless \TeX\
actually does break a formula badly, since the chances of this are
pretty slim.
\danger A ``↑{discretionary multiplication sign}'' is allowed in formulas:
If you type `|$(x+y)\*(x-y)$|', \TeX\ will treat the ↑{:*} something like
the way it treats \hbox{|\-|}; namely, a line break will be allowed at
that place, with the hyphenation penalty. However, instead of inserting a
hyphen, \TeX\ will insert a $\times$ sign in text size.
\danger If you do want to permit a break at some point in the outer level
of a formula, you can say ↑{:allowbreak}. For example, if the formula
\begintt
$(x_1,\ldots,x_m,\allowbreak y_1,\ldots,y_n)$
\endtt
appears in the text of paragraph, \TeX\ will allow it to be broken into the
two pieces `$(x_1,\ldots,x_m,$' and `$y_1,\ldots,y_n)$'.
\ddanger The penalty for breaking after a Rel atom is called ↑{*relpenalty},
and the penalty for breaking after a Bin atom is called ↑{*binoppenalty}.
Plain \TeX\ sets |\relpenalty=500| and |\binoppenalty=700|. You can change
the penalty for breaking in any particular case by typing `↑{*penalty}\<number>'
immediately after the atom in question; then the number you have specified
will be used instead of the ordinary penalty. For example, you can prohibit
breaking in the formula `$x=0$' by typing `|$x=\nobreak0$|', since
↑{:nobreak} is an abbreviation for `|\penalty10000|'.
\ddangerexercise Is there any difference between the results of
`|$x=\nobreak0$|' and `|${x=0}$|'?
\answer Neither formula will be broken between lines, but the thick spaces
in the second formula will be set to their natural width while the thick
spaces in the first formula will retain their stretchability.
\ddangerexercise How could you prohibit all breaks in formulas, by making only
a few changes to the macros of plain \TeX?
\answer Set ↑{*relpenalty}|=10000| and ↑{*binoppenalty}|=10000|.
And you also need to change the definitions of\/ ↑{:bmod} and ↑{:pmod},
which insert their own penalties.
\subsection Braces. A variety of different notations have sprung up involving
the symbols `$\{$' and `$\}$', and plain \TeX\ includes several control
sequences that help you cope with formulas involving such things.
↑(braces) ↑(leftbrace) ↑(rightbrace)
In simple situations, braces are used to indicate a ↑{set} of objects;
for example, `$\{a,b,c\}$' stands for the set of three objects $a$, $b$,
and@$c$. There's nothing special about typesetting such formulas, except
that you must remember to use |\{| and |\}| for the braces:
↑(:leftbrace) ↑(:rightbrace)
\beginmathdemo
|$\{a,b,c\}$|&\{a,b,c\}\cr
|$\{1,2,\ldots,n\}$|&\{1,2,\ldots,n\}\cr
|$\{\rm red,white,blue\}$|&\{\rm red,white,blue\}\cr
\endmathdemo
A slightly more complex case arises when a set is indicated by giving a
generic element followed by a specific condition; for example, `$\{\,x\mid
x>5\,\}$' stands for the set of all objects $x$ that are greater than@5.
In such situations the control sequence ↑{:mid} should be used for the
↑{vertical bar}, and thin spaces should be inserted inside the braces:
\beginmathdemo
|$\{\,x\mid x>5\,\}$|&\{\,x\mid x>5\,\}\cr
|$\{\,x:x>5\,\}$|&\{\,x:x>5\,\}\cr
\endmathdemo
(Some authors prefer to use a colon instead of `$\mid$', as in the second
example here.) \ When the delimiters get larger, as in
$$\display\bigl\{\,\bigl(x,f(x)\bigr)\bigm\vert x\in D\,\bigr\}$$
they should be called ↑{:bigl}, ↑{:bigm}, and@↑{:bigr}; for example,
the formula just given would be typed
\begintt
\bigl\{\,\bigl(x,f(x)\bigr)\bigm|char`||x\in D\,\bigr\}
\endtt
and formulas that involve still larger delimiters would use ↑{:Big} or
↑{:bigg} or@even ↑{:Bigg}, as explained in Chapter@17.
\exercise How would you typeset the formula
$\bigl\{\,x↑3\bigm\vert h(x)\in\{-1,0,+1\}\,\bigr\}$?
\answer |$\bigl\{\,x↑3\bigm|\vrt|h(x)\in\{-1,0,+1\}\,\bigr\}$|.
\dangerexercise Sometimes the condition that defines a set is given as
a fairly long English description, not as a formula; for example, consider
`$\{\,p\mid p$@and $p+2$ are prime$\,\}$'. This can be handled conveniently
with ↑{*hbox}:
\begintt
$\{\,p\mid\hbox{$p$ and $p+2$ are prime}\,\}$
\endtt
but a long formula like this is troublesome in a paragraph, since an hbox
cannot be broken between lines, and since the glue inside the |\hbox| does
not vary with the interword glue in the line that contains it. Explain how
the given formula could be typeset with line breaks allowed. [{\sl Hint:\/}
Go back and forth between math ↑{mode} and horizontal mode.]
\answer |$\{\,p\mid p$@and $p+2$ are prime$\,\}$|, assuming that
↑{*mathsurround} is zero. The more difficult alternative
`|$\{\,p\mid p\ {\rm and}\ p+2\rm\ are\ prime\,\}$|' is not a solution,
because line breaks do not occur at |\|\] ↑(*space)
(or at glue of any kind) within math formulas. Of course it may be best to
display a formula like this, instead of to break it between lines.
Displayed formulas often involve another sort of brace, to indicate a choice
between various alternatives, as in the construction
$$\display
\vert x\vert=\cases{x,&if $x\ge0$;\cr -x,&otherwise.\cr}$$
↑(selection, see cases) ↑(alternatives, see cases) ↑(choices, see cases)
You can typeset it with the control sequence ↑{:cases}:
\begintt
$$|char`||x|vrt=\cases{x,&if $x\ge0$;\cr
-x,&otherwise.\cr}$$
\endtt
Look closely at this example and notice that it uses the character |&|,
↑(ampersand) which we said in Chapter@7 was reserved for special purposes.
Here for the first time in this manual we have an example of why |&|@is
so special: Each of the cases has two parts, and the@|&| separates those
parts. To the left of the@|&| is a math formula that is implicitly
enclosed in |$...$|; to the right of the@|&| is ordinary text, which is
{\sl not\/} implicitly enclosed in |$...$|. For example, the `|-x,|' in
the second line will be typeset in math mode, but the `|otherwise|' will
be typeset in horizontal mode. Blank spaces after the@|&| are ignored.
There can be any number of cases, but there usually are at least two.
Each case should be followed by ↑{*cr}. Notice that the |\cases| construction
typesets its own `$\{$'; there is no corresponding `$\}$'.
\exercise Typeset the display \lower12pt\null\
$\smash{\displaystyle
f(x)=\cases{1/3&if $0\le x\le1$;\cr 2/3&if $3\le x\le4$;\cr 0&elsewhere.\cr}
}$
\answer |$$f(x)=\cases{1/3&if $0\le x\le1$;\cr 2/3&if $3\le x\le4$;\cr|\hfil
\break|0&elsewhere.\cr}$$|
\danger You can insert `↑{*noalign}$\langle$vertical mode
material$\rangle$' just after any |\cr| within |\cases|, as explained in
Chapter@22, because |\cases| is an application of the general alignment
constructions considered in that chapter. For example, the command
`|\noalign{\vskip2pt}|' can be used to put a little extra space between
two of the cases.
\danger ↑{Horizontal braces} will be set over or under parts of a displayed
formula if you use the control sequences ↑{:overbrace} or ↑{:underbrace}.
Such constructions are considered to be large operators like |\sum|, so you
can put limits above them or below them by specifying superscripts or
subscripts, as in the following examples:
\beginlongdisplaymathdemo
\noalign{\vskip9pt}
|$$\overbrace{x+\cdots+x}↑{k\rm\;times}$$|&
\overbrace{x+\cdots+x}↑{k\rm\;times}\cr
\noalign{\vskip-6pt}
|$$\underbrace{x+y+z}_{>\,0}.$$|&
\underbrace{x+y+z}_{>\,0}.\cr
\endmathdemo
\subsection Matrices. Now comes the fun part. Mathematicians in many different
disciplines like to construct rectangular arrays of formulas that have been
arranged in rows and columns; such an ↑{array} is called a {\sl↑{matrix}}.
Plain \TeX\ provides a ↑{:matrix} control sequence that makes it convenient
to deal with the most common types of matrices.
For example, suppose that you want to specify the display
$$A=\left(\matrix{x-\lambda&1&0\cr
0&x-\lambda&1\cr
0&0&x-\lambda\cr}\right).$$
All you do is type
\begintt
$$A=\left(\matrix{x-\lambda&1&0\cr
0&x-\lambda&1\cr
0&0&x-\lambda\cr}\right).$$
\endtt
↑(:lambda)
This is very much like the |\cases| construction we looked at earlier;
each row of the matrix is followed by@|\cr|, and `|&|'@signs are used
between the individual entries of each row. Notice, however, that you are
supposed to put your own |\left| and |\right| delimiters around the matrix;
this makes |\matrix| different from |\cases|, which inserts a big `$\{$'
automatically. The reason is that |\cases| always involves a left brace,
but different delimiters are used in different matrix constructions. On
the other hand, parentheses are used more often than other delimiters, so
you can write ↑{:pmatrix} if you want plain \TeX\ to fill in the
parentheses for you; the example above then reduces to
\begintt
$$A=\pmatrix{x-\lambda&...&x-\lambda\cr}.$$
\endtt
\exercise Typeset the display \ \lower12pt\null
$\smash{\displaystyle
\left\lgroup\matrix{a&b&c\cr d&e&f\cr}\right\rgroup
\left\lgroup\matrix{u&x\cr v&y\cr w&z\cr}\right\rgroup
},$ \
using ↑{:lgroup} and ↑{:rgroup}.
\answer |$$\left\lgroup\matrix{a&b&c\cr d&e&f\cr}\right\rgroup|\hfil\break
|\left\lgroup\matrix{u&x\cr v&y\cr w&z\cr}\right\rgroup$$|.
\danger The individual entries of a matrix are normally centered in columns.
Each column is made as wide as necessary to accommodate the entries it
contains, and there's a ↑{quad} of space between columns.
If you want to put something ↑{flush right} in its column, precede it
by ↑{*hfill}; if you want to put something ↑{flush left} in its column,
follow it by@↑{*hfill}.
\danger Each entry of a matrix is treated separately from the others,
and it is typeset as a math formula in text style. Thus, for example,
if you say |\rm| in one entry, it does not affect the others.
Don't try to say `|{\rm x&y}|'.
Matrices often appear in the form of generic patterns that use ↑{ellipses}
(i.e., dots) to indicate rows or columns that are left out. You can typeset
such matrices by putting the ellipses into rows and/or columns of their own.
Plain \TeX\ provides ↑{:vdots} (vertical dots) and ↑{:ddots} (diagonal dots)
as companions to ↑{:ldots} for constructions like this. For example, the
↑{generic matrix}
$$A=\pmatrix{a_{11}&a_{12}&\ldots&a_{1n}\cr
a_{21}&a_{22}&\ldots&a_{2n}\cr
\vdots&\vdots&\ddots&\vdots\cr
a_{m1}&a_{m2}&\ldots&a_{mn}\cr}$$
is easily specified:
\begintt
$$A=\pmatrix{a_{11}&a_{12}&\ldots&a_{1n}\cr
a_{21}&a_{22}&\ldots&a_{2n}\cr
\vdots&\vdots&\ddots&\vdots\cr
a_{m1}&a_{m2}&\ldots&a_{mn}\cr}$$
\endtt
\medskip
\exercise How can you get \TeX\ to produce the ↑{column vector} ↑(vector)
\lower18pt\null\ $\smash{\displaystyle
\pmatrix{y_1\cr \vdots\cr y_k\cr}
}$\quad?
\answer |\pmatrix{y_1\cr \vdots\cr y_k\cr}|.
\danger Sometimes a matrix is bordered at the top and left by formulas
that give labels to the rows and columns. Plain \TeX\ provides a special
macro called ↑{:bordermatrix} for this situation. For example, the display
$$\tenmath
M=\bordermatrix{&C&I&C'\cr C&1&0&0\cr I&b&1-b&0\cr C'&0&a&1-a\cr}$$
is obtained when you type
\begintt
$$M=\bordermatrix{&C&I&C'\cr
C&1&0&0\cr I&b&1-b&0\cr C'&0&a&1-a\cr}$$
\endtt
The first row gives the upper labels, which appear above the big left
and right parentheses; the first column gives the left labels, which are
typeset flush left, just before the matrix itself. The first column in
the first row is normally blank. Notice that |\bordermatrix| inserts
its own parentheses, like |\pmatrix| does.
\danger It's usually inadvisable to put matrices into the text of a paragraph,
because they are so big that they are better displayed. But occasionally
you may want to specify a small matrix like $1\,1\choose0\,1$, which you can
↑(:choose) ↑(matrix, small)
typeset for example as `|$1\,1\choose0\,1$|'. Similarly, the small matrix
$\bigl({a\atop l}{b\atop m}{c\atop n}\bigr)$ can be typeset as
\begintt
$\bigl({a\atop l}{b\atop m}{c\atop n}\bigr)$
\endtt
↑(*atop) The |\matrix| macro does not produce small arrays of this sort.
\subsection Vertical spacing. If you want to tidy up an unusual formula,
you know already how to move things farther apart or closer together, by
using positive or negative thin spaces. But such spaces affect only the
horizontal dimension; what if you want something to be moved higher
or lower? That's an advanced topic.
\danger Appendix B provides a few macros that can be used to fool \TeX\
into thinking that certain formulas are larger or smaller than they really
are; such tricks can be used to move other parts of the formula up or down
or left or right. For example, we have already discussed the use of
↑{:mathstrut} in Chapter@16 and ↑{:strut} in Chapter@17; these invisible
boxes caused \TeX\ to put square root signs and the denominators of
continued fractions into different positions than usual.
\danger If you say `↑{:phantom}|{|\<subformula>|}|' in any formula, \TeX\
will do all of its spacing as if you had said simply
`|{|\<subformula>|}|', but the subformula itself will be invisible. Thus,
for example, `|\phantom{0}2|' takes up just as much space as `|02|' in the
current style, but only the@|2| will actually appear on the page. If you
want to leave blank space for a ↑{new symbol} that has exactly the same
size as $\sum$, but if you are forced to put that symbol in by hand for
some reason, `|\mathop{\phantom\sum}|' will leave exactly the right amount
of blank space. \ (The `↑{*mathop}' here makes this phantom behave like
|\sum|, i.e., as a large operator.)
\danger Even more useful than |\phantom| is ↑{:vphantom}, which makes
an invisible box whose height and depth are the same as those of
the corresponding |\phantom|, but
the width is zero. Thus, |\vphantom| makes a vertical ↑{strut} that can
increase a formula's effective height or depth. Plain \TeX\ defines
|\mathstrut| to be an abbreviation for `|\vphantom(|'. There's also
↑{:hphantom}, which has the width of a |\phantom|, but its height
and depth are zero.
\danger Plain \TeX\ also provides `↑{:smash}|{|\<subformula>|}|', a macro
that yields the same result as `|{|\<subformula>|}|' but makes the height and
depth zero. By using both |\smash| and |\vphantom| you can typeset any
subformula and give it any desired nonnegative height and depth. For example,
\begintt
\mathop{\smash\limsup\vphantom\liminf}
\endtt
produces a large operator that says `$\limsup$', but its height and depth
are those of\/ ↑{:liminf} (i.e., the depth is zero). ↑(:limsup)
\def\undertext#1{$\underline{\hbox{#1}}$}
\ddangerexercise If you want to underline some text, you could use a macro like
\begintt
\def\undertext#1{$\underline{\hbox{#1}}$}
\endtt
to do the job. \undertext{But} \undertext{this} \undertext{doesn't}
\undertext{always} \undertext{work} \undertext{right}. Discuss better
alternatives. ↑(underlined text)
\answer |\def|\stretch|\undertext|\stretch|#1{$\underline|\stretch
|{\smash|\stretch|{\hbox|\stretch|{#1}}}$}| will underline the
\def\undertext#1{$\underline{\smash{\hbox{#1}}}$}%
words and cross \undertext{through} the descenders; or you could insert
|\vphantom{y}| before the |\hbox|, thereby lowering all of the underlines
to a position below all descenders. Neither of these gives exactly what is
wanted. \ (See also ↑{:underbox} in Appendix@B\null.) \ Underlining is actually
not very common in fine typography, since font changes usually work just
as well or better, when you want to emphasize something. If you really want
underlined text, the best solution is to have a special font in which all
the letters are underlined.
\ddanger You can also use ↑{*raise} and ↑{*lower} to adjust the vertical
positions of boxes in formulas. For example, the formula
`|$2↑{\raise1pt\hbox{$\scriptstyle n$}}$|' will have its superscript@$n$
one point higher than usual ($2↑{\raise1pt\hbox{$\scriptstyle n$}}$ instead of
$2↑n$). Note that it was necessary to say ↑{*scriptstyle} in this example,
since the contents of an ↑{*hbox} will normally be in text style even when
that hbox appears in a superscript, and since |\raise| can be used only in
connection with a box. This method of positioning is not used extremely
often, but it is sometimes helpful if the ↑{:root} macro doesn't put its
argument in a suitable place. For example,
$$\displaybox{|\root\raise|\<dimen>|\hbox{$\scriptscriptstyle|\<argument>%
|$}\of...|}$$
will move the argument up by a given amount.
\ddanger Instead of changing the sizes of subformulas, or using |\raise|,
you can also control vertical spacing by changing the parameters
that \TeX\ uses when it is converting math lists to horizontal lists.
These parameters are described in Appendix@G\null; you need to be careful when
changing them, because such changes are ↑{global} (i.e., not local to groups).
Here is an example of how such a change might be made: Suppose that you
are designing a format for ↑{chemical typesetting}, and that you expect to be
setting a lot of formulas like `$\rm Fe_2↑{+2}Cr_2O_4$'. You may not like the
fact that the subscript in@$\rm Fe_2↑{+2}$ is lower than the subscript
in@$\rm Cr_2$; and you don't want to force users to type monstrosities like
\begintt
$\rm Fe_2↑{+2}Cr_2↑{\vphantom{+2}}O_4↑{\vphantom{+2}}$
\endtt
just to get the formula
$\rm Fe_2↑{+2}Cr_2↑{\vphantom{+2}}O_4↑{\vphantom{+2}}$
with all subscripts at the same level. Well, all you need to do is set
`↑{*texinfo}|\tensy16=2.7pt|' and `|\texinfo\tensy17=2.7pt|', assuming
that ↑{:tensy} is your main symbol font (|\textfont2|); this lowers all
normal ↑{subscripts} to a position $2.7\pt$ below the baseline, which is
enough to make room for a possible superscript that contains a plus sign.
Similarly, you can adjust the positioning of ↑{superscripts} by changing
|\texinfo\tensy14|. There are parameters for the position of the ↑{axis line},
the positions of ↑{numerator} and ↑{denominator} in a generalized ↑{fraction},
the spacing above and below ↑{limits}, the default ↑{rule thickness}, and so
on. Appendix@G gives precise details.
\subsection Special features for math hackers. \TeX\ has a few more primitive
operations for math mode that haven't been mentioned yet. They are
occasionally useful if you are designing special formats.
\ddanger If a glue or kern specification is immediately preceded by
`↑{*nonscript}', \TeX\ will not use that glue or kern in
script or scriptscript styles. Thus, for example, the sequence
`|\nonscript\;|' produces exactly the amount of space specified by
`{\tt(3)}' in the spacing table for mathematics that appeared earlier
in this chapter.
\ddanger Whenever \TeX\ has scanned a |$| and is about to read a math formula
that appears in text, it will first read another list of tokens that has
been predefined by the command ↑{*everymath}|{|\<token list>|}|. \ (This is
analogous to |\everypar|, which was described in Chapter@14.) \ Similarly,
you can say ↑{*everydisplay}|{|\<token list>|}| to predefine a list of tokens
for \TeX\ to read just after it has scanned an opening |$$|, i.e., just
before reading a formula that is to be displayed. With |\everymath| and
|\everydisplay|, you can set up special conventions that you wish to apply
to all formulas.
\subsection Summary. We have discussed more different kinds of formulas in
this chapter than you will usually find in any one book of mathematics.
If you have faithfully done the exercises so far, you can face almost
any formula with confidence.
\danger But here are a few more exercises, to help you review what you
have learned. Each of the following ``challenge formulas'' illustrates one
or more of the principles already discussed in this chapter. The author
confesses that he is trying to trip you@up on several of these.
Nevertheless, if you try each one before looking at the answer, and if
you're alert for traps, you should find that these formulas provide a good
way to consolidate and complete your knowledge.
\challenge Explain how to type the phrase `$n↑{\rm th}$ root', where
`$n↑{\rm th}$' is treated as a mathematical formula with a superscript in
roman type.
\answer |$n↑{\rm th}$ root|. \ (Incidentally, it is also acceptable
to type `|$n$th|', getting `$n$th', in such situations; the fact that
the $n$ is in italics distinguishes it from the suffix. Typed manuscripts
generally render this with a hyphen, but `$n$-th' is frowned on nowadays
when an italic@$n$ is available.) ↑(nth)
\challenge $\qquad\tenmath{\bf S↑{\rm-1}TS=dg}(\omega_1,\ldots,\omega_n)
=\bf\Lambda$.
\answer |${\bf S↑{\rm-1}TS=dg}(\omega_1,|\stretch|\ldots,|\stretch|\omega_n)
=\bf\Lambda$|.
\ $\bigl($Did you notice the difference between ↑{:omega} ($\omega$)
and@|w| ($w$)?$\bigr)$
\challenge $\qquad\tenmath\Pr(\,m=n\mid m+n=3\,)$.
\answer |$\Pr(\,m=n\mid m+n=3\,)$|. \ (Analogous to a set.) ↑(:Pr)
\challenge $\qquad\tenmath\sin18↑\circ={1\over4}(\sqrt5-1)$.↑(degrees)
\answer |$\sin18↑\circ={1\over4}(\sqrt5-1)$|. ↑(:circ)
\challenge $\qquad\tenmath k=1.38\times10↑{-23}\rm\,erg/↑\circ K$.
\answer |$k=1.38\times10↑{-23}\rm\,erg/↑\circ K$|.
\challenge $\qquad\tenmath \bar\Phi\subset NL_1↑*/N=\skew1\bar L_1↑*
\subseteq\cdots\subseteq NL_n↑*/N=\skew1\bar L_n↑*$.
\answer |$\bar\Phi\subset NL_1↑*/N=\skew1\bar L_1↑*|\par
| \subseteq\cdots\subseteq NL_n↑*/N=\skew1\bar L_n↑*$|.
\challenge $\qquad\tenmath I(\lambda)=\int\!\!\int_Dg(x,y)e↑{i\lambda h(x,y)}
\,dx\,dy$. % cf. Math. Comp. 37 (1981), 509
\answer |$I(\lambda)=\int\!\!\int_Dg(x,y)e↑{i\lambda h(x,y)}\,dx\,dy$|.\hfil
\break
(Although three |\!|'s work out best between consecutive integral signs in
displays, the text style seems to want only two.) ↑(double integral)
↑(integral, multiple)
\challenge $\qquad\tenmath
\int_0↑1\!\cdots\int_0↑1f(x_1,\ldots,x_n)\,dx_1\ldots\,dx_n$.
\answer |$\int_0↑1\!\cdots\int_0↑1f(x_1,\ldots,x_n)\,dx_1\ldots\,dx_n$|.
\challenge Here's a display.
$$\tenmath x_{2m}\equiv\cases{Q(X_m↑2-P_2W_m↑2)-2S↑2&($m$ odd)\cr
\noalign{\vskip2pt}
P_2↑2(X_m↑2-P_2W_m↑2)-2S↑2&($m$ even)\cr}\pmod N.$$
\answer |$$x_{2m}\equiv\cases{Q(X_m↑2-P_2W_m↑2)-2S↑2&($m$ odd)\cr|\par
| \noalign{\vskip2pt} % spread the lines apart a little|\par
| P_2↑2(X_m↑2-P_2W_m↑2)-2S↑2&($m$ even)\cr}\pmod N.$$|
\challenge And another. % ACP Eq. 1.2.9--33
$$\tenmath (1+x_1z+x_1↑2z↑2+\cdots\,)\ldots(1+x_nz+x_n↑2z↑2+\cdots\,)
={1\over(1-x_1z)\ldots(1-x_nz)}.$$
\answer |$$(1+x_1z+x_1↑2z↑2+\cdots\,)\ldots(1+x_nz+x_n↑2z↑2+\cdots\,)|\par
| ={1\over(1-x_1z)\ldots(1-x_nz)}.$$| \ (Notice the uses of\/ |\,|.)
\challenge And another. % Eq. 1.2.9--9
$$\tenmath \prod_{j\ge0}\biggl(\sum_{k\ge0}a_{jk}z↑k\biggr)
=\sum_{n\ge0}z↑n\,\Biggl(\sum_
{\scriptstyle k_0,k_1,\ldots\ge0\atop
\scriptstyle k_0+k_1+\cdots=n}
a_{0k_0}a_{1k_1}\ldots\,\Biggr).$$
\answer |$$\prod_{j\ge0}\biggl(\sum_{k\ge0}a_{jk}z↑k\biggr)|\par
| =\sum_{n\ge0}z↑n\,\Biggl(\sum_|\par
| {\scriptstyle k_0,k_1,\ldots\ge0\atop|\par
| \scriptstyle k_0+k_1+\cdots=n}|\par
| a_{0k_0}a_{1k_1}\ldots\,\Biggr).$$|
\challenge And, % cf ACP vol1 p64
$$\tenmath {(n_1+n_2+\cdots+n_m)!\over n_1!\,n_2!\ldots n_m!}
={n_1+n_2\choose n_2}{n_1+n_2+n_3\choose n_3}
\ldots{n_1+n_2+\cdots+n_m\choose n_m}.$$
\answer |$${(n_1+n_2+\cdots+n_m)!\over n_1!\,n_2!\ldots n_m!}|\par
| ={n_1+n_2\choose n_2}{n_1+n_2+n_3\choose n_3}|\par
| \ldots{n_1+n_2+\cdots+n_m\choose n_m}.$$|
\challenge Yet another display. % found in Chaundy et al
$$\tenmath \def\\#1#2{(1-q↑{#1_#2+n})} % to save typing
\Pi_R{a_1,a_2,\ldots,a_M\atopwithdelims[]b_1,b_2,\ldots,b_N}
=\prod_{n=0}↑R{\\a1\\a2\ldots\\aM\over\\b1\\b2\ldots\\bN}.$$
\answer |$$\def\\#1#2{(1-q↑{#1_#2+n})} % to save typing|\par
|\Pi_R{a_1,a_2,\ldots,a_M\atopwithdelims[]b_1,b_2,\ldots,b_N}|\par
| =\prod_{n=0}↑R{\\a1\\a2\ldots\\aM\over\\b1\\b2\ldots\\bN}.$$|
↑(*atopwithdelims)
\challenge And another.
$$\tenmath \sum_{p\rm\;prime}f(p)=\int_{t>1}f(t)\,d\pi(t).$$
\answer |$$\sum_{p\rm\;prime}f(p)=\int_{t>1}f(t)\,d\pi(t).$$|
\challenge Still another.
$$\tenmath \{\underbrace{\overbrace{\mathstrut a,\ldots,a}
↑{k\;a\mathchar`'\rm s},
\overbrace{\mathstrut b,\ldots,b}
↑{l\;b\mathchar`'\rm s}}_{k+l\rm\;elements}\}.$$
\answer |$\{\underbrace{\overbrace{\mathstrut a,\ldots,a}|\par
| ↑{k\;a\mathchar`'\rm s},|\par
| \overbrace{\mathstrut b,\ldots,b}|\par
| ↑{l\;b\mathchar`'\rm s}}_{k+l\rm\;elements}\}.$$|\par
\smallskip\noindent Notice how ↑{apostrophes} (instead of primes) were obtained.
\challenge Put a ↑{:smallskip} between the rows of matrices in the
compound matrix ↑(compound matrix)
$$\tenmath \pmatrix{\pmatrix{a&b\cr c&d\cr}&
\pmatrix{e&f\cr g&h\cr}\cr
\noalign{\smallskip}
0&\pmatrix{i&j\cr k&l\cr}\cr}.$$
\answer |$$\pmatrix{\pmatrix{a&b\cr c&d\cr}&|\par
| \pmatrix{e&f\cr g&h\cr}\cr|\par
| \noalign{\smallskip}|\par
| 0&\pmatrix{i&j\cr k&l\cr}\cr}.$$|
\challenge Make the columns ↑{flush left} here. % cf Polya/Szego VII.43.2
$$\tenmath \det\left\vert\,\matrix{
c_0&c_1\hfill&c_2\hfill&\ldots&c_n\hfill\cr
c_1&c_2\hfill&c_3\hfill&\ldots&c_{n+1}\hfill\cr
c_2&c_3\hfill&c_4\hfill&\ldots&c_{n+2}\hfill\cr
\,\vdots\hfill&\,\vdots\hfill&
\,\vdots\hfill&&\,\vdots\hfill\cr
c_n&c_{n+1}\hfill&c_{n+2}\hfill&\ldots&c_{2n}\hfill\cr
}\right\vert>0.$$
\answer |$$\det\left|\vrt|\,\matrix{|\par
| c_0&c_1\hfill&c_2\hfill&\ldots&c_n\hfill\cr|\par
| c_1&c_2\hfill&c_3\hfill&\ldots&c_{n+1}\hfill\cr|\par
| c_2&c_3\hfill&c_4\hfill&\ldots&c_{n+2}\hfill\cr|\par
| \,\vdots\hfill&\,\vdots\hfill&|\par
| \,\vdots\hfill&&\,\vdots\hfill\cr|\par
| c_n&c_{n+1}\hfill&c_{n+2}\hfill&\ldots&c_{2n}\hfill\cr|\par
| }\right|\vrt|>0.$$|
\cchallenge The main problem here is to prime the $\sum$. ↑(:sum prime) ↑(=def)
$$\tenmath \mathop{{\sum}'}_{x\in A}f(x)\mathrel{\mathop=↑{\rm def}}
\sum_{\scriptstyle x\in A\atop\scriptstyle x\ne0}f(x).$$
\answer |$$\mathop{{\sum}'}_{x\in A}f(x)=\mathrel{\mathop=↑{\rm def}}|\par
| \sum_{\scriptstyle x\in A\atop\scriptstyle x\ne0}f(x).$$|\par
\smallskip\noindent
This works because |{\sum}| is type Ord (so its superscript is not set
above), but ↑{*mathop}|{{\sum}'}| is type Op (so its subscript is set below).
\cchallenge You may be ready now for this display.
$$\tenmath 2\uparrow\uparrow k\mathrel{\mathop=↑{\rm def}}
2↑{2↑{2↑{\cdot↑{\cdot↑{\cdot↑2}}}}}
\vbox{\hbox{$\Big\}\scriptstyle k$}\kern0pt}.$$
\answer |$$2\uparrow\uparrow k\mathrel{\mathop=↑{\rm def}}|\par
| 2↑{2↑{2↑{\cdot↑{\cdot↑{\cdot↑2}}}}}|\par
| \vbox{\hbox{$\Big\}\scriptstyle k$}\kern0pt}.$$|\par
\cchallenge And finally, when you have polished off all the other examples,
here's the ultimate test. Explain how to obtain the ↑{commutative diagram}
% from Invent. Math. 70 (1982), 34
$$\tenmath\def\normalbaselines{\baselineskip20pt\lineskip1pt\lineskiplimit0pt }
\def\mapright#1{\smash{
\mathop{\longrightarrow}\limits↑{#1}}}
\def\mapdown#1{\Big\downarrow
\rlap{$\vcenter{\hbox{$\scriptstyle#1$}}$}}
\matrix{\noalign{\vskip6pt}&&&&&&0\cr
&&&&&&\mapdown{}\cr
0&\mapright{}&{\cal O}_C&\mapright\iota&
\cal E&\mapright\rho&\cal L&\mapright{}&0\cr
&&\Big\Vert&&\mapdown\phi&&\mapdown\psi\cr
0&\mapright{}&{\cal O}_C&\mapright{}&
\pi_*{\cal O}_D&\mapright\delta&
R↑1f_*{\cal O}_V(-D)&\mapright{}&0\cr
&&&&&&\mapdown{\theta_i\otimes\gamma↑{-1}}\cr
&&&&&&\hidewidth R↑1f_*\bigl({\cal O}
_V(-iM)\bigr)\otimes\gamma↑{-1}\hidewidth\cr
&&&&&&\mapdown{}\cr
&&&&&&0\cr\noalign{\vskip6pt}}$$
using ↑{:matrix}. \ (Many of the entries are blank.)
\answer If you have to do a lot of commutative diagrams, you will want to
define some macros like those in the first few lines of this solution.
The |\matrix| macro resets the baselines to ↑{:normalbaselines}, because
other commands like |\openup| might have changed them, so
we redefine \hbox{|\normalbaselines|} in this solution.
\smallskip
|$$\def\normalbaselines{\baselineskip20pt|\par
| \lineskip3pt \lineskiplimit3pt }|\par
|\def\mapright#1{\smash{|\par
| \mathop{\longrightarrow}\limits↑{#1}}}|\par
|\def\mapdown#1{\Big\downarrow|\par
| \rlap{$\vcenter{\hbox{$\scriptstyle#1$}}$}}|\par
|\matrix{&&&&&&0\cr|\par
| &&&&&&\mapdown{}\cr|\par
| 0&\mapright{}&{\cal O}_C&\mapright\iota&|\par
| \cal E&\mapright\rho&\cal L&\mapright{}&0\cr|\par
| &&\Big\Vert&&\mapdown\phi&&\mapdown\psi\cr|\par
| 0&\mapright{}&{\cal O}_C&\mapright{}&|\par
| \pi_*{\cal O}_D&\mapright\delta&|\par
| R↑1f_*{\cal O}_V(-D)&\mapright{}&0\cr|\par
| &&&&&&\mapdown{\theta_i\otimes\gamma↑{-1}}\cr|\par
| &&&&&&\hidewidth R↑1f_*\bigl({\cal O}|\par
| _V(-iM)\bigr)\otimes\gamma↑{-1}\hidewidth\cr|\par
| &&&&&&\mapdown{}\cr|\par
| &&&&&&0\cr}$$|\par
\subsection Words of advice. The number of different notations is
enormous and still growing, so you will probably continue to find
new challenges as you continue to type mathematical papers. It's a
good idea to keep a personal notebook in which you record all of
the non-obvious formulas that you have handled successfully,
showing both the final output and what you typed to get it.
Then you'll be able to refer back to those solutions when you
discover that you need to do something similar, a few months later.
If you're a mathematician who types your own papers, you have now learned
how to get enormously complex formulas into print, and you can do so
without going through an intermediary who may somehow distort their
meaning. But please, don't get too carried away by your newfound talent;
the fact that you are able to typeset your formulas with \TeX\ doesn't
necessarily mean that you have found the best notation for communicating
with the readers of your work. Some notations will be unfortunate even
when they are beautifully formatted.
\endchapter
Mathematicians are like Frenchmen:\/
% Die Mathematiker sind eine Art Franzosen:
whenever you say something to them, they translate it into their own language,
% redet man zu ihnen, so u"bersetzen sie es in ihre Sprache,
and at once it is something entirely different.
% und dann ist es alsobald ganz etwas Anders.
\author ↑{GOETHE}, {\sl Maxims and Reflexions\/} (1829)
% see Schriften der Goethe-Gesellschaft, vol 21, pp 266 and 389
\bigskip
The best notation is no notation;
whenever it is possible to avoid
the use of a complicated alphabetic apparatus,
avoid it.
A good attitude to the preparation of written mathematical exposition
is to pretend that it is spoken.
Pretend that you are explaining the subject to a friend
on a long walk in the woods, with no paper available;
fall back on symbolism only when it is really necessary.
\author PAUL ↑{HALMOS}, {\sl How to Write Mathematics\/} (1970)
% in {\sl L'Enseignement Math\'ematique\/}
% vol 16, 123--152; section 15; reprinted in AMS pub "How to Write Math"
\eject
\beginchapter Chapter 19. Displayed Equations
By now you know how to type mathematical formulas so that \TeX\ will handle
them with supreme elegance; your knowledge of math typing is nearly complete.
But there is one more part to the story, and the purpose of this chapter
is to present the happy ending. We have discussed how to deal with individual
formulas; but ↑{displays} often involve a whole bunch of different formulas,
or different pieces of a huge formula, and it's a bit of a problem to
lay them out so that they line up properly with each other. Fortunately,
large displays generally fall into a few simple patterns.
\subsection One-line displays. Before plunging into the general question
of display layout, let's recapitulate what we have already covered.
If you type `|$$|\<formula>|$$|', \TeX\ will display the formula ↑(dollardollar)
in flamboyant display style, centering it on a line by itself. We have also
noted in Chapter@18 that it's possible to display two short formulas at once,
by typing `|$$|\<formula$_1$>↑{:qquad}\<formula$_2$>|$$|'; this reduces
the two-formula problem to a one-formula problem. You get the two
formulas separated by two quads of space, the whole being centered on a line.
Displayed equations often involve ordinary text. Chapter@18 explains
how to get roman type into formulas without leaving math mode, but the
best way to get text into a display is to put it into an ↑{*hbox}.
There needn't even be any math at all; to typeset
$$\hbox{Displayed Text}$$
you can simply say `|$$\hbox{Displayed Text}$$|'. But here's a more interesting
example:
$$X_n=X_k \qquad\hbox{if and only if}\qquad
Y_n=Y_k \quad\hbox{and}\quad Z_n=Z_k.$$
Formulas and text were combined in this case typing
\begintt
$$X_n=X_k \qquad\hbox{if and only if}\qquad
Y_n=Y_k \quad\hbox{and}\quad Z_n=Z_k.$$
\endtt
Notice that |\qquad| appears around `if and only if', but a single ↑{:quad}
surrounds `and'; this helps to indicate that the $Y$ and@$Z$ parts of the
display are related more closely to each other than to the $X$@part.
Consider now the display
$$Y_n=X_n\bmod p \quad\hbox{and}\quad Z_n=X_n\bmod q
\qquad\hbox{for all }n\ge0.$$
Can you figure out how to type this? One solution is
\begintt
$$Y_n=X_n\bmod p \quad\hbox{and}\quad Z_n=X_n\bmod q
\qquad\hbox{for all }n\ge0.$$
\endtt
Notice that a space has been left after `|all|' in the hbox here, since spaces
disappear when they are out in formula-land. But there's a simpler and more
logical way to proceed, once you get used to \TeX's idea of modes: you can type
\begintt
... \qquad\hbox{for all $n\ge0$.}$$
\endtt
Wow---that's math mode inside of horizontal mode inside of display
math mode. But in this way your manuscript mirrors what you are trying to
accomplish, while the previous solution (with the space after `|all|')
looks somewhat forced.
\exercise Typeset the following four displays (one at a time):
$$\openup{3pt}\displaylines{
\sum_{n=0}↑\infty a_nz↑n\qquad\hbox{converges if}\qquad
\vert z\vert<\Bigl(\limsup_{n\to\infty}
\root n\!\of{\vert a_n\vert}\,\Bigr)↑{-1}.\cr
{f(x+\Delta x)-f(x)\over\Delta x}\to f'(x)\qquad\hbox{as $\Delta x\to0$.}\cr
\noalign{\vskip2pt}
\Vert u_i\Vert=1,\qquad u_i\cdot u_j=0\quad\hbox{if $i\ne j$.}\cr
\it\hbox{The confluent image of}\quad\left\{
\matrix{\hbox{an arc}\hfill\cr\hbox{a circle}\hfill\cr
\hbox{a fan}\hfill\cr}
\right\}\quad\hbox{is}\quad\left\{
\matrix{\hbox{an arc}\hfill\cr\hbox{an arc or a circle}\hfill\cr
\hbox{a fan or an arc}\hfill\cr}\right\}.\cr
\noalign{\vskip-8pt}}$$
↑(:Delta) ↑(:Vert)
% the last example comes from Proc AMS 55 (1976), 410, with typos corrected
\answer |\sum_{n=0}↑\infty a_nz↑n\qquad\hbox{converges if}\qquad|\par
| |\vrt|z|\vrt|<\Bigl(\limsup_{n\to\infty}\root n\!\of{|\vrt|a_n|\vrt↑(:root)
|}\,\Bigr)↑{-1}.|\par
\smallskip
|$${f(x+\Delta x)-f(x)\over\Delta x}\to f'(x)|\par
| \qquad\hbox{as $\Delta x\to0$.}$$|\par
\smallskip
|$$\|\vrt|u_i\|\vrt|=1,\qquad u_i\cdot u_j=0\quad\hbox{if $i\ne j$.}$$|\par
\smallskip
|$$\it\hbox{The confluent image of}\quad\left\{|\par
| \matrix{\hbox{an arc}\hfill\cr\hbox{a circle}\hfill\cr|\par
| \hbox{a fan}\hfill\cr}|\par
| \right\}\quad\hbox{is}\quad\left\{|\par
| \matrix{\hbox{an arc}\hfill\cr|\par
| \hbox{an arc or a circle}\hfill\cr|\par
| \hbox{a fan or an arc}\hfill\cr}\right\}.$$|\par
\smallskip\noindent
The first example includes |\!| and |\,| to give slightly refined spacing;
but the point of the problem was to illustrate the hbox, not to fuss over
such extra details.
The last example can be done much more simply using the ideas of
Chapter@22, if you don't mind descending to the level of \TeX\ primitives;
for example, the first matrix could be replaced by ↑(*halign)
\begintt
\,\vcenter{\halign{\hbox{#\hfil}\cr
an arc\cr a circle\cr a fan\cr}}\,
\endtt
and the second is similar.
\dangerexercise Sometimes display style is too grandiose, when the formula
being displayed is
$$y={1\over2}x$$
or something equally simple. One day B. L. ↑{User} tried to remedy this by
typing it as `|$$y={\scriptstyle1\over\scriptstyle2}x$$|', but the
resulting formula
$$y={\scriptstyle1\over\scriptstyle2}x$$
wasn't at all what he had in mind. What's the right way to get ↑(one half)
↑(1/2--unslashed form) simply `$y={1\over2}x$' when you don't want big
↑{fractions in displays}?
\answer |$$\textstyle y={1\over2}x$$|. \ (Switching to text style is
especially common in multiline formulas. For example, you will probably
find occasions to use ↑{*textstyle} on both sides of the |&|'s within
an ↑{:eqalign}.)
\dangerexercise What difference, if any, is there between the result of
typing `|$$|\<formula>|$$|' and the result of typing
`|$$\hbox{$|\<formula>|$}$$|'\thinspace?
\answer The latter formula will be a text style, not display style.
And even if you do type `|$$\hbox{$\displaystyle{|\<formula>|}$}$$|', the
results are not quite the same, as we will see later: \TeX\ will compress
the glue in `|$$|\<formula>|$$|' if the formula is too wide to fit on
a line at its natural width, but the glue inside |\hbox{...}| is frozen
at its natural width.
\dangerexercise You may have noticed that most of the displays in this
manual are not centered; displayed material is usually aligned at the
left with the paragraph indentation, as part of the book design, because
this is an unusual book. Explain how you could typeset a formula like
$$\leftline{\indent$\displaystyle
1-{1\over2}+{1\over3}-{1\over4}+\cdots=\ln2$}$$
that is off-center in this way.
\answer One solution is to put the formula in an hbox that occupies a full line:
\begintt
$$\leftline{\indent$\displaystyle
1-{1\over2}+{1\over3}-{1\over4}+\cdots=\ln2$}$$
\endtt
But this takes a bit of typing; the author actually used much trickier (but
less robust) macros, which appear in Appendix@E.
If you've had previous experience typing mathematical papers, you probably
have been thinking, ``What about ↑{equation numbers}? When is this book
going to talk about them?'' Ah yes, now is the time to discuss those sneaky
little labels that appear off to the side of displays. If you type
$$\displaybox{|$$|\<formula>|\eqno|\<formula>|$$|}$$
\TeX\ will display the first formula and it will also put an equation number
(the second formula) at the right-hand margin. For example,
\begintt
$$x↑2-y↑2 = (x+y)(x-y).\eqno(15)$$
\endtt
↑(*eqno)
will produce this:
$$x↑2-y↑2 = (x+y)(x-y).\eqno(15)$$
You can also get equation numbers at the left-hand margin, with ↑{*leqno}.
For example,
\begintt
$$x↑2-y↑2 = (x+y)(x-y).\leqno(16)$$
\endtt
will produce this:
$$x↑2-y↑2 = (x+y)(x-y).\leqno(16)$$
Notice that you always give the equation number second, even when it is going
to appear at the left. Everything from the |\eqno| or |\leqno| command to
the |$$| that ends the display is the equation number. Thus, you're not
allowed to have two equation numbers in the same display; but there's a
way to get around that restriction, as we'll see later.
\danger Nowadays people are using right-hand equation numbers more and more,
because a display most often comes at the end of a sentence or clause, and
the right-hand convention keeps the number from intruding into the clause.
Furthermore, it's often possible to save space when a displayed equation
follows a short text line, since less space is needed above the display;
such savings are not possible with |\leqno|, because there's no room for
overlap. For example, there is less space above display@(15) than there is
above@(16) in our illustrations of\/ |\eqno| and |\leqno|, although the
formulas and text are otherwise identical.
\danger If you look closely at (15) and (16) above, you can see that the
displayed formulas have been centered without regard to the presence of
the equation numbers. But when a formula is large, \TeX\ makes sure that
it does not interfere with its number; the equation number may even be
placed on a line by itself.
\exercise How would you produce the following display?
$$\prod_{k\ge0}{1\over(1-q↑kz)}=
\sum_{n\ge0}z↑n\bigg/\prod_{1\le k\le n}(1-q↑k).\eqno(16')$$
\answer |$$\prod_{k\ge0}{1\over(1-q↑kz)}=|\par
| \sum_{n\ge0}z↑n\bigg/\prod_{1\le k\le n}(1-q↑k).\eqno(16')$$|
\dangerexercise Equation numbers are math formulas, typeset in text style.
So how can you get an equation number like `\hbox{(3--1)}'
(with an ↑{en-dash})?
\answer |\eqno\hbox{(3--1)}|.
\ddangerexercise B. L. ↑{User} tried typing `|\eqno(*)|' and `|\eqno(**)|',
and he was pleased to discover that this produced the equation numbers
`$(*)$' and `$(**)$'. \ [He had been a bit worried that they would come out
`(*)' and `(**)' instead.] \ But then a few months later he tried
`|\eqno(***)|' and got a surprise. What was it?
\answer When you type an ↑{asterisk} in math mode, plain \TeX\ considers
|*| to be a binary operation. In the cases `|(*)|' and `|(**)|', the
binary operations are converted to type@Ord, because they don't appear in
a binary context; but the middle asterisk in `|(***)|' remains of type@Bin.
So the result was `$(***)$'. To avoid the extra medium spaces, you can
type `|\eqno(*{*}*)|'; or you can change ↑{*mathcode}|`*|, if you never use
|*| as a binary operation.
\ddanger Somewhere in this manual there ought to be a description of exactly
how \TeX\ displays formulas; i.e., how it centers them, how it places the
equation numbers, how it inserts extra space above and below, and so on.
Well, now is the time for those rules to be stated. They are somewhat
complex, because they interact with things like |\parshape|, and because
they involve several parameters that haven't been discussed yet. The purpose
of the rules is to explain exactly what sorts of boxes, glue, and
penalties are placed onto the current ↑{vertical list} when a display occurs.
\ddanger If a display occurs after, say, four lines of a paragraph, \TeX's
internal register called ↑{*prevgraf} will be equal to@4 when the display
starts. The display will be assumed to take three lines, so |\prevgraf|
will become@7 when the paragraph is resumed at the end of the display
(unless you have changed |\prevgraf| in the meantime). \TeX\ assigns
special values to three \<dimen> parameters immediately after the opening
|$$| is sensed: ↑{*displaywidth} and ↑{*displayindent} are set to the
line width@$z$ and the shift amount@$s$ for line number |\prevgraf|${}+2$,
based on the current paragraph shape or hanging indentation. \ (Usually
|\displaywidth| is the same as ↑{*hsize}, and |\displayindent| is zero,
but these quantities can vary as described in Chapter@14.) \ Furthermore,
↑{*predisplaysize} is set to the effective width@$p$ of the line preceding
the display, as follows: If there was no previous line (i.e., if the |$$|
was preceded by ↑{*noindent} or by the closing |$$| of another display),
$p$@is set to $-16383.99999\pt$ (i.e., to the smallest legal dimension,
$-$↑{:maxdimen}). Otherwise \TeX\ looks inside the hbox that was formed
by the previous line, and sets $p$ to the position of the right edge of
the rightmost box inside that hbox, plus the indentation by which the
enclosing hbox has been moved right, plus two ems in the current font.
However, if this value of@$p$ depends on the fact that glue in that hbox
was stretching or shrinking---for example, if the ↑{*parfillskip} glue is
finite, so that the material preceding it has not been set at its natural
width---then $p$@is set to \hbox{|\maxdimen|}. \ (This doesn't happen
often, but it keeps \TeX\ machine independent, since $p$@never depends on
quantities that may be rounded differently on different computers.) \
Notice that \hbox{|\displaywidth|} and \hbox{|\displayindent|} are not
affected by \hbox{|\leftskip|} and \hbox{|\rightskip|}, but
\hbox{|\predisplaysize|} is. The values of\/ \hbox{|\displaywidth|},
\hbox{|\displayindent|}, and \hbox{|\predisplaysize|}
will be used by \TeX\ after the displayed formula has been
read, as explained below; your program can examine them and/or change
them, if you want the typesetting to be done differently.
\ddanger After a display has been read, \TeX\ converts it from a math list
to a horizontal list@$h$ in display style, as explained in
Appendix@G\null. An equation number, if present, is processed in text
style and put into an hbox@$a$ with its natural width. Now the fussy
processing begins: Let $z$, $s$, and@$p$ be the current values of\/\
\hbox{|\displaywidth|}, \hbox{|\displayindent|}, and
\hbox{|\predisplaysize|}. Let $q$ and@$e$ be zero if there is no equation
number; otherwise let@$e$ be the width of the equation number, and let@$q$
be equal to $e$@plus one quad in the symbols font (i.e., in
↑{*textfont}|2|). Let $w_0$ be the natural width of the displayed
formula@$h$. If $w_0+q\le z$, list@$h$@is packaged in an hbox@$b$ having
its natural width@$w_0$. But if $w_0+q>z$ (i.e., if the display is too
wide to fit at its natural width), \TeX\ performs the following ``↑{squeeze
routine}'': If $e\ne0$ and if there is enough shrinkability in the
displayed formula@$h$ to reduce its width to $z-q$, then list $h$ is
packaged in an hbox@$b$ of width@$z-q$. Otherwise $e$ is set to zero, and
list@$h$ is packaged in a (possibly overfull) hbox@$b$ of width $\min(w_0,z)$.
\ddanger (Continuation.) \ \TeX\ tries now to center the display without
regard to the equation number. But if such centering would make it too close
to that number (where ``too close'' means that the space between them is less
than the width@$e$), the equation is either centered in the remaining space
or placed as far from the equation number as possible. The latter alternative
is chosen only if the first item on list@$h$ is glue, since \TeX\ assumes that
such glue was placed there in order to control the spacing precisely.
But let's state the rules more formally: Let@$w$ be the width of box@$b$.
\TeX\ computes a displacement@$d$, to be used later when positioning box@$b$,
by first setting $d={1\over2}(z-w)$. If $e>0$ and if $d<2e$, then $d$@is
reset to ${1\over2}(z-w-e)$ or to zero, where zero is chosen if list@$h$
begins with a glue item.
\ddanger (Continuation.) \ \TeX\ is now ready to put things onto the current
vertical list, just after the material previously constructed for the
paragraph-so-far. First comes a ↑{penalty} item, whose cost is an integer
parameter called ↑{*predisplaypenalty}. Then comes glue. If $d+s\le p$,
or if there was a left equation number (|\leqno|), \TeX\ sets $g_a$ and
$g_b$ to glue items specified by the parameters ↑{*abovedisplayskip}
and ↑{*belowdisplayskip}, respectively; otherwise $g_a$ and $g_b$ become
glue items corresponding to ↑{*abovedisplayshortskip} and
↑{*belowdisplayshortskip}. \ [Translation: If the predisplaysize is short
enough so that it doen't overlap the displayed formula, the glue above and
below the display will be ``short'' by comparison with the glue that is
used when there is no overlap.] \ If $e=0$ and if there is an |\leqno|,
the equation number is appended as an hbox by itself, shifted right@$s$ and
preceded by interline glue as usual; an infinite penalty is also appended,
to prevent a page break between this number and the display. Otherwise
a glue item@$g_a$ is placed on the vertical list.
\ddanger (Continuation.) \ Now comes the displayed equation itself. If
$e≠0$, the equation number box@$a$ is combined with the formula box@$b$ as
follows: Let@$k$ be a kern of width $z-w-e-d$. In the |\eqno| case, box@$b$
is replaced by an hbox containing $(b,k,a)$; in the |\leqno| case, box@$b$
is replaced by an hbox containing $(a,k,b)$, and $d$@is set to zero. In all
cases, box@$b$ is then appended to the vertical list, shifted right by@$s+d$.
\ddanger (Continuation.) \ The final task is to append the glue or the
equation number that follows the display. If there was an |\eqno| and if
$e=0$, an infinite penalty is placed on the vertical list, followed by the
equation number box@$a$ shifted right by $s+z$ minus its width, followed
by a penalty item whose cost is the value of\/ ↑{*postdisplaypenalty}.
Otherwise a penalty item for the \hbox{|\postdisplaypenalty|} is appended
first, followed by a glue item for@$g_b$ as specified above. \TeX\ now
adds@3 to |\prevgraf| and returns to horizontal mode, ready to resume the
paragraph.
\ddanger One consequence of these rules is that you can force an equation
number to appear on a line by itself by making its width zero, i.e.,
by saying either `|\eqno|↑{:llap}|{$|\<formula>|$}|' or
`|\leqno|↑{:rlap}|{$|\<formula>|$}|'. This makes $e=0$, and
the condition $e=0$ controls \TeX's positioning logic, as explained
in the rules just given.
\ddanger Plain \TeX\ sets |\predisplaypenalty=10000|, because fine
printers traditionally shun displayed formulas at the very top of a page.
You can change \hbox{|\predisplaypenalty|} and
\hbox{|\postdisplaypenalty|} if you want to encourage or discourage
page breaks just before or just after a display. For example,
`\hbox{|$$\postdisplaypenalty=|}\allowbreak\hbox{|-10000|\<formula>|$$|}'
will force a page break, putting the formula at the bottom line. It is better
to force a ↑{page break} this way than to say ↑{:eject} right after |$$...$$|;
such an eject (which follows the \hbox{|\belowdisplayskip|} glue below the
display) causes the page to be short, because it leaves unwanted glue at
the bottom.
\ddangerexercise Read the rules carefully and deduce the final position
of `$x+y$' in the formula
\begintt
$$\quad x=y \hskip10000pt minus 1fil \eqno(5)$$
\endtt
assuming that there is no hanging indentation. Also consider |\leqno|
instead of\/ |\eqno|.
\answer Assuming that |\hsize| is less than $10000\pt$, the natural width of
this equation will be too large to fit on a line; also, |\quad| specifies
glue at the left. Therefore `$x=y$' will appear exactly $1\rm\,em$ from
the left, and `(5)' will appear flush right. \ (The widths will satisfy
$w=z-q$, $d=0$, $k=q-e=18\rm\,mu$.)\par
In the case of\/ |\leqno|, `(5)' will appear flush left, followed by
one quad of space in |\textfont2|, followed by one quad of space in the
current text font, followed by `$x=y$'.
\ddangerexercise \TeX\ also allows ``↑{alignment displays},'' which are not
processed in math mode because they contain no formulas at the outer
level. An alignment display is created@by
$$\displaybox{|$$|\<assignments>|\halign{|\<alignment>|}|\<assignments>|$$|}$$
where the \<assignments> are optional things like parameter changes that do not
produce any math lists. In such displays, the |\halign| is processed exactly
as if it had appeared in vertical mode, and it will construct a vertical
list@$v$ as usual, except that each row of the alignment will be shifted
right by the ↑{*displayindent}. After the alignment and the closing
assignments have been processed, \TeX\ will put a ↑{*predisplaypenalty}
item and some ↑{*abovedisplayskip} glue on the main vertical list, followed
by@$v$, followed by a ↑{*postdisplaypenalty} item and ↑{*belowdisplayskip}
glue. Thus, alignment displays are essentially like ordinary alignments,
except that they can interrupt paragraphs; furthermore, they are embedded in
glue and penalties just like other displays. The ↑{*displaywidth} and
↑{*predisplaysize} do not affect the result, although you could use
those parameters in your ↑{*halign}. An entire alignment display is considered
to be only three lines long, as far as ↑{*prevgraf} is concerned.
\subsection Multi-line displays. OK, the use of displayed formulas is
very nice. But when you try typing a lot of manuscripts you will run into
some displays that don't fit the simple pattern of a one-line formula with
or without an equation number. Plain \TeX\ provides special control
sequences that will cover most of the remaining cases.
Multi-line displays usually consist of several equations that should be
lined up by their `$=$'@signs, as in
$$\eqalign{X_1+\cdots+X_p&=m,\cr
Y_1+\cdots+Y_q&=n.\cr}$$
The recommended procedure for such a display is to use ↑{:eqalign},
which works with special markers |&| ↑(ampersand) and ↑{*cr} that we
have already encountered in connection with |\cases| and |\matrix|
in Chapter@18. Here's how to type this particular one:
\begintt
$$\eqalign{X_1+\cdots+X_p&=m,\cr
Y_1+\cdots+Y_q&=n.\cr}$$
\endtt
There can be any number of equations in an |\eqalign|; the general pattern is
$$\halign{\indent#\hfil\cr
|\eqalign{|&\<left-hand side$_1$>|&|\<right-hand side$_1$>|\cr|\cr
&\<left-hand side$_2$>|&|\<right-hand side$_2$>|\cr|\cr
\noalign{\vskip-2pt}
&\qquad\vdots\cr
&\<left-hand side$_2$>|&|\<right-hand side$_2$>|\cr}|\cr}$$
where each \<right-hand side> starts with the symbol on which you want
alignment to occur. For example, every right-hand side often begins
with an $=$@sign. The equations will be typeset in display style.
\exercise In practice, the left-hand sides of aligned formulas are often
blank, and the alignment is often done with respect to other symbols
as well as@$=$. For example, the following display is typical; see if you
can guess how the author typed it:
$$\eqalign{T(n)\le T(2↑{\lceil\lg n\rceil})
&\le c(3↑{\lceil\lg n\rceil}-2↑{\lceil\lg n\rceil})\cr
&<3c\cdot3↑{\lg n}\cr
&=3c\,n↑{\lg3}.\cr}$$ % from v2 p279
\answer (Note in particular that the final `|.|' comes {\sl before\/} the
final `|\cr|'.)
\begintt
$$\eqalign{T(n)\le T(2↑{\lceil\lg n\rceil})
&\le c(3↑{\lceil\lg n\rceil}-2↑{\lceil\lg n\rceil})\cr
&<3c\cdot3↑{\lg n}\cr
&=3c\,n↑{\lg3}.\cr}$$
\endtt
The result of\/ |\eqalign| is a vertically centered box. This makes it easy to
get a formula like
$$\left\{
\eqalign{\alpha&=f(z)\cr \beta&=f(z↑2)\cr \gamma&=f(z↑3)\cr}
\right\}\qquad\left\{
\eqalign{x&=\alpha↑2-\beta\cr y&=2\gamma\cr}\right\}.$$ % meaningless
You simply use |\eqalign| twice in the same line:
\begintt
$$\left\{
\eqalign{\alpha&=f(z)\cr \beta&=f(z↑2)\cr \gamma&=f(z↑3)\cr}
\right\}\qquad\left\{
\eqalign{x&=\alpha↑2-\beta\cr y&=2\gamma\cr}\right\}.$$
\endtt
\exercise Try your hand at the numbered two-line display % Polya/Szego V.29
$$\eqalign{P(x)&=a_0+a_1x+a_2x↑2+\cdots+a_nx↑n,\cr
P(-x)&=a_0-a_1x+a_2x↑2-\cdots+(-1)↑na_nx↑n.\cr}\eqno(30)$$
[{\sl Hint:\/} Use the fact that |\eqalign| produces a vertically centered
box; the equation number `(30)' is supposed to appear halfway between
the two lines.]
\answer |$$\eqalign{P(x)&=a_0+a_1x+a_2x↑2+\cdots+a_nx↑n,\cr|\par
| P(-x)&=a_0-a_1x+a_2x↑2-\cdots+(-1)↑na_nx↑n.\cr}\eqno(30)$$|\par
\exercise What happens if you forget the |&| in one equation of an |\eqalign|?
\answer Both sides of that equation are considered to be on the left, so
you get results that look like this:
$$\openup{-3pt}
\left\{\eqalign{\alpha&=f(z)\cr \beta&=f(z↑2)\cr \gamma=f(z↑3)\cr}
\right\}.$$
\danger Multi-line formulas sometimes fit together in odd ways, and you'll
find that every once in a@while you will want to move certain lines farther
apart or closer together. If you type `↑{:noalign}|{|↑{*vskip}\<glue>|}|'
after any |\cr|, \TeX\ will insert the given amount of extra glue just
after that particular line. For example,
\begintt
\noalign{\vskip3pt}
\endtt
will put $3\pt$ of additional space between lines. You can also change the
amount of space before the first line, in the same way.
The next level of complexity occurs when you have several aligned
equations with several equation numbers. Or perhaps some of the
lines are numbered and others are not:
$$\eqalignno{(x+y)(x-y)&=x↑2-xy+yx-y↑2\cr
&=x↑2-y↑2;&(4)\cr
(x+y)↑2&=x↑2+2xy+y↑2.&(5)\cr}$$
For this situation plain \TeX\ provides ↑{:eqalignno}; you use it like
|\eqalign|, but on each line that you want an equation number you add
`|&|\<equation number>' just before the |\cr|. The example above was
generated by
\begintt
$$\eqalignno{(x+y)(x-y)&=x↑2-xy+yx-y↑2\cr
&=x↑2-y↑2;&(4)\cr
(x+y)↑2&=x↑2+2xy+y↑2.&(5)\cr}$$
\endtt
Notice that the second |&| is omitted unless there's an equation number.
And there's also ↑{:leqalignno}, which puts equation numbers at the left.
In this case it is appropriate to move the `(4)' to the beginning
of its equation:
$$\leqalignno{(x+y)(x-y)&=x↑2-xy+yx-y↑2&(4)\cr
&=x↑2-y↑2;\cr
(x+y)↑2&=x↑2+2xy+y↑2.&(5)\cr}$$
Although the equation numbers appear at the left, you are still supposed to
input them at the right, just as you do with |\leqno|; in other words,
you should type
`|$$\leqalignno{(x+y)(x-y)&...&(4)\cr...}$$|' to get the previous display.
Caution: |\eqalignno| and |\leqalignno| both center the set of equations
without regard to the widths of the equation numbers. If the equations or
their numbers get too wide, they might overlap, yet no error message will
be given.
\exercise Typeset the following display: ↑(:gcd)
$$\leqalignno{\gcd(u,v)&=\gcd(v,u);&(9)\cr
\gcd(u,v)&=\gcd(-u,v).&(10)\cr}$$ % v2 p316
\answer |$$\leqalignno{\gcd(u,v)&=\gcd(v,u);&(9)\cr|\par
| \gcd(u,v)&=\gcd(-u,v).&(10)\cr}$$|
\exercise And here's another one to try, just to keep in practice: ↑(:int)
$$\vbox{
\eqalignno{\biggl(\int_{-\infty}↑\infty e↑{-x↑2}\,dx\biggr)↑2
&=\int_{-\infty}↑\infty\int_{-\infty}↑\infty
e↑{-(x↑2+y↑2)}\,dx\,dy\cr
&=\int_0↑{2\pi}\int_0↑\infty e↑{-r↑2}r\,dr\,d\theta\cr
&=\int_0↑{2\pi}\biggl(-{e↑{-r↑2}\over2}
\bigg\vert_{r=0}↑{r=\infty}\,\biggr)\,d\theta\cr
&=\pi.&(11)\cr}
}$$ % cf Joy of TeX
\answer |$$\eqalignno{\biggl(\int_{-\infty}↑\infty e↑{-x↑2}\,dx\biggr)↑2|\par
| &=\int_{-\infty}↑\infty\int_{-\infty}↑\infty|\par
| e↑{-(x↑2+y↑2)}\,dx\,dy\cr|\par
| &=\int_0↑{2\pi}\int_0↑\infty e↑{-r↑2}r\,dr\,d\theta\cr|\par
| &=\int_0↑{2\pi}\biggl(-{e↑{-r↑2}\over2}|\par
| \bigg|\vrt|_{r=0}↑{r=\infty}\,\biggr)\,d\theta\cr|\par
| &=\pi.\cr}$$| ↑(:bigg)
\danger Although |\eqalign| and |\eqalignno| look nearly the same, there's
really a fundamental distinction between them: |\eqalign| makes a single,
vertically centered box, which is no wider than it needs to be; but
|\eqalignno| generates a set of lines that have the full display width
(reaching all the way to both margins). Thus, for example, you can use
|\eqalign| several times in a display, but |\eqalignno| can appear only
once. If you try to use ↑{*eqno} in conjunction with |\eqalign|,
you get a decent result, but if you try to use |\eqno| in connection
with |\eqalignno| you'll get some sort of weird error message(s).
\ddanger The definitions in Appendix@B reveal why |\eqalign| and |\eqalignno|
behave differently: |\eqalign| is an
abbreviation for ↑{*vcenter}|{|↑{*halign}|{...}}|, while
|\eqalignno| is an abbreviation for |\halign to\the\displaywidth{...}|;
thus |\eqalignno| generates an ``↑{alignment display}.''
\ddanger This difference between |\eqalign| and |\eqalignno| has two
interesting consequences. \ (1)@It's impossible to break an |\eqalign|
between pages, but an |\eqalignno| can be broken. In fact, you can
{\sl force\/} a ↑{page break} after a particular line if you insert
`↑{:noalign}|{|↑{:break}|}|'
after the |\cr| for that line; and you can {\sl prohibit\/} such a break
if you insert `|\noalign{|↑{:nobreak}|}|'. You can prohibit {\sl all\/} breaks
in an |\eqalignno| if you enclose the whole works in a ↑{*vbox}:
\begintt
$$\vbox{\eqalignno{...}}$$
\endtt
(2) You can also insert a line of text between two equations, without
losing the alignment. For example, consider the two displays
$$\eqalignno{x&=y+z\cr
\noalign{\hbox{and}}
x↑2&=y↑2+z↑2.\cr}$$
These were actually generated as a single display by typing
\begintt
$$\eqalignno{x&=y+z\cr
\noalign{\hbox{and}}
x↑2&=y↑2+z↑2.\cr}$$
\endtt
Therefore the fact that their $=$ signs line up is not just a lucky
coincidence. Sometimes you will want to adjust the spacing above or below
such a line of inserted text, by putting a |\vskip| or two inside of the
|\noalign{...}|. Incidentally, this example also shows that it is
possible to use |\eqalignno| without giving any equation numbers.
\ddangerexercise What happens if\/ |\eqalign| is substituted for
|\eqalignno| in this last example?
\answer You get the displayed box
$$\eqalign{x&=y+z\cr
\noalign{\hbox{and}}
x↑2&=y↑2+z↑2.\cr}$$
Reason: The `and' occurs at the left of the |\eqalign| box, not at the
left of the whole display, and the |\eqalign| box is centered as usual.
\ddangerexercise Our friend Ben ↑{User} got into trouble again when he tried to
move an equation number up higher than its usual position, by typing this:
↑(*raise)
\begintt
$$\eqalignno{...&\raise6pt\hbox{(5)}\cr}$$
\endtt
What was his oversight, and what could he have done instead?
\answer By raising the equation number, he increased the line height,
so \TeX\ put extra space between that line and the previous line
when it calculated the inter-line glue. If he had said
`↑{:smash}|{\raise...}|', he wouldn't have had that problem.
\danger For other types of displays, plain \TeX\ provides ↑{:displaylines},
which lets you display any number of formulas in any way you want,
without any alignment. The general form is
$$\halign{\indent\hfil#\hfil\cr
|$$\displaylines{|&\<displayed formula$_1$>|\cr|\cr
&\<displayed formula$_2$>|\cr|\cr
\noalign{\vskip-2pt}
&\qquad\vdots\cr
&\<displayed formula$_n$>|\cr}$$|\cr}$$
Each formula will be centered, because |\displaylines| puts ↑{*hfil} at
the left and the right of each line; you can override this centering to
get things flush left or flush right by inserting ↑{*hfill}, which takes
precedence over |\hfil|.
\dangerexercise Use |\displaylines| to typeset the three-line display
$$\displaylines{\hfill x\equiv x;\hfill\llap(1)\cr
\hfill\hbox{if}\quad x\equiv y\quad\hbox{then}\quad
y\equiv x;\hfill\llap(2)\cr
\hfill\hbox{if}\quad x\equiv y\quad\hbox{and}\quad
y\equiv z\quad\hbox{then}\quad
x\equiv z.\hfill\llap(3)\cr}$$
\answer |$$\displaylines{\hfill x\equiv x;\hfill\llap(1)\cr|\par
| \hfill\hbox{if}\quad x\equiv y\quad\hbox{then}\quad|\par
| y\equiv x;\hfill\llap(2)\cr|\par
| \hfill\hbox{if}\quad x\equiv y\quad\hbox{and}\quad|\par
| y\equiv z\quad\hbox{then}\quad|\par
| x\equiv z.\hfill\llap(3)\cr}$$|\par\medskip\noindent
There's also a trickier solution, which begins with
\begintt
$$\displaylines{x\equiv x;\hfil\llap(1)\hfilneg\cr
\endtt
\danger If you look closely at the multi-line displays in this chapter,
you'll see that the baselines are farther apart than they are in normal
text; mathematics publishers generally do this in order to make the
displays easier to read. In accordance with this tradition, |\eqalign| and
its relatives automatically increase the ↑{*baselineskip}. If@you are
making a multi-line display with \TeX's primitive ↑{*halign} command,
instead of using one of the plain \TeX\ macros, you might want to make
this same baseline adjustment, and you can do it easily by saying
`|$$\openup{3pt}\halign{...}$$|'. The ↑{:openup} macro increases ↑{*lineskip}
and ↑{*lineskiplimit} as well as \hbox{|\baselineskip|}. Since |$$...$$|
acts as a ↑{group}, the effect of\/ |\openup| will disappear when the
display is finished. You can also say `|$$\openup{-3pt}\eqalignno{...}$$|'
if you don't want |\eqalignno| to open up the lines in some display.
\ddanger After you use |\openup{3pt}| as just suggested, the baselineskip
distance will be $15\pt$ instead of its usual $12\pt$. Thus, the baseline
of the text line that immediately precedes the display will be $15\pt$
above the topmost baseline of the display, plus the ↑{*abovedisplayskip}.
But when the paragraph resumes, its next baseline will be only $12\pt$
below the bottom baseline of the display, plus the ↑{belowdisplayskip},
because the |\baselineskip| parameter will have reverted to its normal
value. The |\eqalignno| and |\displaylines| macros say
`|\noalign{\vskip-3pt}|' before their first lines, in order to compensate
for this difference.
\subsection Long formulas. Our discussion of mathematics typing is almost
complete; we need to deal with just one more problem: What should be
done when a formula is so long that it doesn't fit on a single line?
For example, suppose that you encounter the equation
$$\hfuzz=20pt % overfull box tolerated here
\sigma(2↑{34}-1,2↑{35},1)=
-3+(2↑{34}-1)/2↑{35}+2↑{35}\!/(2↑{34}-1)+7/2↑{35}(2↑{34}-1)
-\sigma(2↑{35},2↑{34}-1,1).$$ % from v2, 1st ed, p76
You'll have to break it up somehow; \TeX\ has done its best to squeeze
everything together by shrinking the spaces next to the $+$ and@$-$ signs
to zero, but still the line has come out overfull.
Let's try to break that equation just before the `$+7$'. One common way to
do this is to type
\begintt
$$\eqalign{\sigma(2↑{34}-1,2↑{35},1)
&=-3+(2↑{34}-1)/2↑{35}+2↑{35}\!/(2↑{34}-1)\cr
&\qquad+7/2↑{35}(2↑{34}-1)-\sigma(2↑{35},2↑{34}-1,1).\cr}$$
\endtt
which yields
$$\eqalign{\sigma(2↑{34}-1,2↑{35},1)
&=-3+(2↑{34}-1)/2↑{35}+2↑{35}\!/(2↑{34}-1)\cr
&\qquad+7/2↑{35}(2↑{34}-1)-\sigma(2↑{35},2↑{34}-1,1).\cr}$$
The idea is to treat a long one-line formula as a two-line formula,
using |\qquad| on the second line so that the second part of the formula
appears well to the right of the $=$@sign on the first line.
\exercise Explain how to deal with the following display. % v2 p107
$$\eqalignno{x_nu_1+\cdots+x_{n+t-1}u_t
&=x_nu_1+(ax_n+c)u_2+\cdots\cr
&\qquad+\bigl(a↑{t-1}x_n+c(a↑{t-2}+\cdots+1)\bigr)u_t\cr
&=(u_1+au_2+\cdots+a↑{t-1}u_t)x_n+h(u_1,\ldots,u_t).
\quad&(47)\cr}$$
\answer |$$\eqalignno{x_nu_1+\cdots+x_{n+t-1}u_t|\par
| &=x_nu_1+(ax_n+c)u_2+\cdots\cr|\par
| &\qquad+\bigl(a↑{t-1}x_n+c(a↑{t-2}+\cdots+1)\bigr)u_t\cr|\par
| &=(u_1+au_2+\cdots+a↑{t-1}u_t)x_n+h(u_1,\ldots,u_t).|\par
| \quad&(47)\cr}$$|\par\noindent
You weren't expected to insert the `|\quad|' on the last line; such
refinements usually can't be anticipated until you see the first proofs.
But without that |\quad| the `(47)' would occur half a quad closer to the
formula.
\danger It's quite an art to decide how to ↑{break long displayed formulas}
into several lines; \TeX\ never attempts to break them, because no set of
rules is really adequate. The author of a mathematical manuscript is
generally the best judge of what to do, since break positions depend on
subtle factors of mathematical exposition. For example, it is often
desirable to emphasize some of the symmetry or other structure that
underlies a formula, and such things require a solid understanding of
exactly what is going on in that formula.
\begingroup\ninepoint
\danger Nevertheless, it is possible to state a few rules of thumb about
how to deal with long formulas in displays, since there are some
principles that the best mathematical typesetters tend to follow:\enddanger
\smallskip
\textindent{a)}Although formulas within a paragraph always break {\sl after\/}
binary operations and relations, displayed formulas always break {\sl before\/}
binary operations and relations. Thus, we didn't end the first line of
our $\sigma(\,\ldots\,)$ example with `|(2↑{34}-1)+|'; we ended it with
`|(2↑{34}-1)|' and began the second line with `|+|'.
\smallskip
\textindent{b)}When an equation is broken before a binary operation, the second
line should start at least two quads to the right of where the innermost
subformula containing that binary operation begins on the first line.
For example, if you wish to break
$$\displaybox{|\sum_{0<k<n}\left(|\<formula$_1$>|+|%
\<formula$_2$>|\right)$$|}$$
at the plus sign between \<formula$_1$> and \<formula$_2$>, it is almost
mandatory to have the plus sign on the second line appear somewhat to the
right of the large left parenthesis that corresponds to `|\left(|'.
\endgroup
\danger In the example just considered, special care is needed to break the
formula into two lines, because ↑{*left} and ↑{*right} delimiters cannot be
used in isolation; you can't have only |\left| in one line of a formula
and only |\right| in the second. Furthermore, you'll want the two delimiters
to be of the same size, even though they occur in different lines. The best
solution is usually to choose the delimiter size yourself; for example,
you could type
$$\halign{\indent#\hfil\cr
|$$\eqalign{\sum_{0<k<n}\biggl(&|\<formula$_1$>|\cr|\cr
| &\qquad+|\<formula$_2$>|\biggr)\cr}$$|\cr}$$
if\/ ↑{:bigg} delimiters are best. Notice that the |&|@markers don't occur
at $=$@signs in this example, they just mark a point of alignment.
\danger There's another way to break long formulas, sometimes called the
{\sl↑{two-line}\/} form. The idea is to put the first part of the formula
almost ↑{flush left}, and to put the second part almost ↑{flush right},
where ``almost flush'' means ``one quad away.'' Thus, the two-line form of
the long $\sigma(\,\ldots\,)$ equation considered earlier is
$$\displaylines{\quad\sigma(2↑{34}-1,2↑{35},1)
=-3+(2↑{34}-1)/2↑{35}+2↑{35}\!/(2↑{34}-1)\hfill\cr
\hfill+7/2↑{35}(2↑{34}-1)-\sigma(2↑{35},2↑{34}-1,1).\quad\cr}$$
It isn't difficult to get this two-line effect with ↑{:displaylines}:
\begintt
$$\displaylines{\quad\sigma(2↑{34}-1,2↑{35},1)
=-3+(2↑{34}-1)/2↑{35}+2↑{35}\!/(2↑{34}-1)\hfill\cr
\hfill{}+7/2↑{35}(2↑{34}-1)-\sigma(2↑{35},2↑{34}-1,1).\quad\cr}$$
\endtt
An extra `|{}|' was typed on the second line here so that \TeX\ would know
that the `|+|' is a binary operation. The two-line form is especially
recommend for equations that have a long left-hand side; in that case the
break generally comes just before the $=$@sign.
\dangerexercise Typeset the following display:
$$\displaylines{\quad\sum_{1\le j\le n}{1\over
(x_j-x_1)\ldots(x_j-x_{j-1})(x-x_j)(x_j-x_{j+1})
\ldots(x_j-x_n)}\hfill\cr
\hfill={1\over(x-x_1)\ldots(x-x_n)}.\quad(27)\cr}$$ % v2 p80
\answer |$$\displaylines{\quad\sum_{1\le j\le n}{1\over|\par
| (x_j-x_1)\ldots(x_j-x_{j-1})(x-x_j)(x_j-x_{j+1})|\par
| \ldots(x_j-x_n)}\hfill\cr|\par
| \hfill={1\over(x-x_1)\ldots(x-x_n)}.\quad(27)\cr}$$|
\ddangerexercise If it is necessary to typeset a huge fraction like
$$\def\\#1;{(#1;q↑2)_\infty}
q↑{{1\over2}n(n+1)}\\ea;\\eq/a;\\caq/e;\\cq↑2/ae;
\over(e;q)_\infty(cq/e;q)_\infty$$
in a single narrow column, you might have to break up the numerator and
resort to
$$\display\def\\#1;{(#1;q↑2)_\infty}
\displaystyle{q↑{{1\over2}n(n+1)}\\ea;\\eq/a;\qquad\atop
\hfill\\caq/e;\\cq↑2/ae;}
\over(e;q)_\infty(cq/e;q)_\infty$$
How would you specify the latter fraction to \TeX?
% cf SIAM J Math Anal 7 (1976) p333; even longer ones appear on p334
\answer |$$\def\\#1;{(#1;q↑2)_\infty} % to save typing|\par
|\displaystyle{q↑{{1\over2}n(n+1)}\\ea;\\eq/a;\qquad\atop|\par
| \hfill\\caq/e;\\cq↑2/ae;}|\par
|\over(e;q)_\infty(cq/e;q)_\infty$$|
\endchapter
When a formula is too long for the page-width
and has to be broken into successive lines
(and we are now, of course, speaking of displayed formulae),
it should be broken, if possible, at the end of a natural `phrase';
if, for example, it is a much-bracketed formula,
it should be broken at the end of one of the major brackets
and not at an inner symbol.
This natural phrasing (as in music or speech)
makes for intelligibility between writer and reader
and should not be left to the compositor.
An author, when he finds himself writing a longish formula,
should indicate a convenient point of fracture in case of need.
\author ↑{CHAUNDY}, ↑{BARRETT}, and ↑{BATEY}, %
{\sl The Printing of Mathematics\/} (1954) % p38
\bigskip
Some authors use display with discretion,
some run even extremely long, complicated equations into the text,
while others tend to display every equation in the paper.
The tendency to overdisplay is probably more predominant
than the tendency to underdisplay;
for this reason it is possible for the copy editor to shorten
(and even improve) papers by running displayed material into text. $\ldots$
On the other hand, there are occasions when the copy editor needs
to suggest the display of complicated expressions that have been run into text,
particularly when it would involve a bad break at the end of a text line.
\author ELLEN ↑{SWANSON}, {\sl Mathematics into Type\/} (1971) % p41
\eject
\beginchapter Chapter 20. Definitions\\(also called Macros)
You can often save time typing math formulas by letting ↑{control sequences}
stand for constructions that occur frequently in a particular manuscript.
For example, if some document uses the vector `$(x↓1,\ldots,x↓n)$' a lot,
you can type
\begintt
\def\xvec{(x_1,\ldots,x_n)}
\endtt
and |\xvec| will henceforth be an abbreviation for `|(x_1,\ldots,x_n)|'.
Complex displays \rlap{like}
$$\def\xvec{(x_1,\ldots,x_n)}
\sum_{\xvec\ne(0,\ldots,0)} \bigl(f\xvec+g\xvec\bigr)$$
can then be typed simply as
\begintt
$$\sum_{\xvec\ne(0,\ldots,0)} \bigl(f\xvec+g\xvec\bigr)$$
\endtt
instead of in a tedious long form. By ↑{defining a control sequence}
like |\xvec|, you not only cut down on the number of keystrokes that you need
to make, you also reduce your chances of introducing typographical errors
and inconsistencies. ↑(abbreviations, see macros)
Of course, you usually won't be making a definition just to speed up the
typing of one isolated formula; that doesn't gain anything, because time goes by
when you're deciding whether or not to make a definition, and when you're
typing the definition itself. The real payoff comes when some cluster of
symbols is used dozens of times throughout a manuscript. A wise typist will
look through a document before typing anything, thereby getting a feeling
for what sorts of problems will arise and what sorts of definitions will
be helpful. For example, Chapter@16 recommends that the control sequence
|\Ahat| be defined at the beginning of any manuscript that uses the
symbol@$\skew5\hat A$ more than once.
Abbreviations like |\xvec| turn out to be useful in many applications of
computers, and they have come to be known as {\sl↑{macros}\/} because they
are so powerful; one little macro can represent an enormous amount of
material, so it has a sort of macroscopic effect. System programs like
\TeX\ that are designed to deal with macro definitions are said to {\sl
expand\/} the user's macros; for example, |\xvec| expands into
|(x_1,\ldots,x_n)|, and ↑{:ldots} in turn is a macro that expands into
|\mathinner{\ldotp\ldotp\ldotp}|. Thus, |\xvec| is actually an
abbreviation for `|(x_1,\mathinner{\ldotp\ldotp\ldotp},x_n)|'. \ (The
expansion stops here, because ↑{*mathinner} is a primitive control
sequence of \TeX, and because |\ldotp| has been defined with
↑{*mathchardef}; thus \hbox{|\mathinner|} and |\ldotp| are not macros.)
\TeX\ users generally build up their own personal ↑{library of macros} for
things that they want to do in different documents. For example, it is common
to have a file called |macros.tex| that contains definitions of your favorite
special control sequences, perhaps together with commands that load your
favorite special fonts, etc. If you begin a document with the command
\begintt
\input macros
\endtt
then \TeX\ will read all those definitions, ↑(*input)
saving you all the trouble of retyping them. Of course, \TeX's memory is
limited, and it takes time to read a file, so you shouldn't put thousands
of definitions into |macros.tex|. A large collection of macro definitions
(e.g., the set of definitions in Appendix@B) is called a {\sl↑{format}\/}
(e.g., ``plain \TeX\ format''); \TeX\ has a special way to input
a format at high speed, assuming that the format doesn't change very often.
The |\xvec| and |\Ahat| examples apply to math formulas, but you can make
good use of macro definitions even when you aren't doing any math at all.
For example, if you are using \TeX\ for ↑{business correspondence},
you can have a |\yours| macro that stands for `Sincerely yours, A.@U.
↑{Thor}'. If you often write form letters you can have macros that
generate entire sentences or paragraphs or groups of paragraphs.
The ↑{Internal Revenue Service} could, for example, make use of macros
like this:
\begintt
\def\badcheck{A penalty has been added because your
check to us was not honored by your bank.\par}
\def\cheater{A penalty of 50\% of the underpaid tax
has been added for fraud.\par}
\endtt
Simple macro definitions, like these, start with `|\def|'; then comes the
control sequence name, e.g., `|\badcheck|'; and then comes the replacement
text enclosed in `|{|' and@`|}|'. The ↑{braces} do not represent
↑{grouping} in this case; they simply show the extent of the replacement
text in the definition. You could, of course, define a macro that
includes actual braces in its replacement text, as long as those braces
match each other properly. For example, `|\def\xbold{{\bf x}}|'
makes |\xbold| an abbreviation for `|{\bf x}|'.
\exercise
Write a |\punishment| macro that prints 100 lines containing the message
`I must not talk in class.' \ [{\sl Hint:\/} First write a macro |\mustnt|
that prints the message once; then write a macro |\five| that prints it
five times.] \checkequals\punishexno{\count\exno}
\answer |\def\mustnt{I must not talk in class.\par}|\par
|\def\five{\mustnt\mustnt\mustnt\mustnt\mustnt}|\par
|\def\twenty{\five\five\five\five}|\par
|\def\punishment{\twenty\twenty\twenty\twenty\twenty}|\par
\smallskip\noindent Solutions to more complicated problems of this type are
discussed later.
\dangerexercise What is the expansion of\/ |\puzzle|, given the following
definitions?
\begintt
\def\a{\b}
\def\b{A\def\a{B\def\a{C\def\a{\b}}}}
\def\puzzle{\a\a\a\a\a}
\endtt
\answer |ABCAB|. \ (The first |\a| expands into |A\def\a{B...}|; this redefines
|\a|, so the second |\a| expands into |B...|, etc.) \ At least, that's what
happens if\/ |\puzzle| is encountered when \TeX\ is building a list. But if
|\puzzle| is expanded in an ↑{*edef} or ↑{*message} or something like that,
we will see later that the interior |\def| commands are not performed
while the expansion is taking place, so the result is an infinite string
\begintt
A\def\a{B\def\a{C\def\a{A\def\a{B\def\a{C\def\a{A...
\endtt
which causes \TeX\ to abort because the program's input stack is finite.
This example points out that a control sequence (e.g., |\b|) need not be
defined when it appears in the replacement text of a definition. The example
also shows that \TeX\ doesn't expand a macro until it needs to.
\danger As soon as you get the hang of simple macros like those illustrated
above, you will probably begin to think, ``Boy, wouldn't it be nice if I could
have a macro in which some of the text in the expansion is changeable? I'd like
to be able to stick different things into the middle of that text.'' Well,
\TeX\ has good news for you: Control sequences can be defined in terms of
{\sl↑{parameters}}, and you can supply {\sl↑{arguments}\/} that will be
substituted for the parameters.
\danger For example, let's consider |\xvec| again. Suppose that you not only
refer to `$(x_1,\ldots,x_n)$', but you also make frequent use of
`$(y_1,\ldots,y_n)$' and other similar things. Then you might want to type
\begintt
\def\vec#1{(#1_1,\ldots,#1_n)}
\endtt
\def\vec#1{(#1_1,\ldots,#1_n)}%
after which |\vec x| will produce `$\vec x$' and |\vec y| will produce
`$\vec y$'. The symbol |#1| ↑(sharpsign) stands for the first parameter to
the macro, and when you say `|\vec|@|x|' the |x| is a so-called argument that
will be inserted in place of the |#1|'s in the replacement text. In this
case the argument consists of a single letter, |x|. You can also say
|\vec\alpha|, in which case the argument will be the control sequence
↑{:alpha}, and the result will be `$\vec\alpha$'. If you want the argument
to contain more than one symbol or control sequence, you can simply enclose
it in ↑{braces}; for example, |\vec{x'}| yields $\vec{x'}$. The argument
in this case is |x'| (without the braces). Incidentally, if you say
|\vec{{x'}}|, you get $\vec{{x'}}$; the reason is that only one pair of
braces is stripped off when the argument is collected, and
$({x'}_1,\ldots,{x'}_n)$ is what you get from
|({x'}_1,\ldots,{x'}_n)| in math mode, according to the rules
of Chapter@16. ↑(apostrophe)
\dangerexercise Continuing this example, what is the result of
|$\vec{\bf x}$|?
\answer \def\vec#1{(#1_1,\ldots,#1_n)}$\vec{\bf x}$. Note that the
subscript@|n| is bold here, because the expansion |(\bf x_1,\ldots,\bf x_n)|
doesn't ``turn off'' ↑{:bf}. To prevent this, one should write
|\vec{{\bf x}}|; or (better), |\vec\xbold|, in conjunction with
|\def\xbold{{\bf x}}|.
\danger The notation `|#1|' suggests that there might be an opportunity to have
more than one parameter, and indeed there is. You can write, for example,
\begintt
\def\vec#1#2{(#1_1,\ldots,#1_#2)}
\endtt
\def\vec#1#2{(#1_1,\ldots,#1_#2)}%
after which `|\vec xn|' would be the proper protocol for `$\vec xn$'. There can
be as many as nine parameters, |#1| to@|#9|, and when you use them you must
number them@in order. For example, you can't use |#5| in a definition unless
the previous parameter in@that definition was called |#4|. \ (This restriction
applies only to the initial statement of parameters, before the replacement
text starts; the stated parameters can be used any number of times, in any
order, in the replacement text itself.)
\danger A control sequence has only one definition at a
time, so the second definition of\/ |\vec| would supersede the first one if
both had appeared in the same document. Whenever \TeX\ encounters a macro
that it wants to expand, it uses the most recent definition. However,
definitions are ↑{local} to the group that contains them; old definitions
will be restored in the usual way when a ↑{group} ends.
\danger Caution: When you define a macro with simple parameters, as in
these examples, you must be careful not to put blank spaces before the
`|{|' that begins the replacement text. For example, `|\def\vec #1 #2 {...}|'
will not give the same result as `|\def\vec#1#2{...}|', because the spaces
after |#1| and@|#2| tell \TeX\ to look for arguments that are followed by
spaces. \ (Arguments can be ``delimited'' in a fairly general way, as
explained below.) \ But the space after |\vec| is optional, as usual,
because \TeX\ always disregards spaces after control words. After
you have said `|\def\vec#1#2{...}|', you are allowed to put spaces
between the arguments (e.g., `|\vec x n|'), because \TeX\ doesn't
use single spaces as undelimited arguments.
\danger The following exercise is particularly recommended for people who
want to learn to write \TeX\ macros. Even if you have gotten into the
dangerous habit of skimming other exercises, you should try your hand
at this one.
\dangerexercise Extending exercise 20.\punishexno, write a ``generalized
punishment'' macro that has two parameters, so that |\punishment{run}{the
halls}| will produce 100 paragraphs that say `I@must not run in the halls.'
\answer The catch is that the parameters have to percolate down to the
|\mustnt| macro, if you extend the previous answer:
\begintt
\def\mustnt#1#2{I must not #1 in #2.\par}
\def\five#1#2{\mustnt{#1}{#2}...\mustnt{#1}{#2}}
\def\twenty#1#2{\five{#1}{#2}...\five{#1}{#2}}
\def\punishment#1#2{\twenty{#1}{#2}...\twenty{#1}{#2}}
\endtt
When you pass parameters from one macro to another in this way, you need to
enclose them in braces as shown. But actually this particular solution
punishes \TeX\ much more than it needs to, because it takes a lot of
time to copy the parameters and read them again and again. There's a
much more efficient way to do the job, by defining control sequences:
\begintt
\def\mustnt{I must not \doit\ in \thatplace.\par}
\def\punishment#1#2{\def\doit{#1}\def\thatplace{#2}%
\twenty\twenty\twenty\twenty\twenty}
\endtt
and by defining |\five| and |\twenty| without parameters as before.
You can also delve more deeply into \TeX nicalities, constructing solutions
that are more efficient yet; \TeX\ works even faster when macros
communicate with each other via ↑{boxes}.
↑(efficient macros) ↑(communication between macros)
For example,
\begintt
\def\mustnt{\copy0 }
\def\punishment#1#2{\setbox0=
\vbox{\strut I must not #1 in #2.\strut}%
\twenty\twenty\twenty\twenty\twenty}
\endtt
sets 100 identical paragraphs at high speed, because \TeX\ has to
process the paragraph and break it into lines only once. It's much faster
to ↑{copy a box} than to build it up from scratch. \ (The ↑{struts} in
this example keep the interbaseline distances correct between boxed
paragraphs, as explained in Chapter@12.)
\ninepoint % the rest of this chapter is all dangerous
\ddanger \TeX\ also allows you to define macros whose parameters are delimited
in quite a general way; you needn't always enclose arguments in braces.
For example,
\begintt
\def\cs #1. #2\par{...}
\endtt
defines a control sequence |\cs| with two parameters, and its two arguments
will be determined as follows: |#1| will consist of all tokens between
|\cs| and the next subsequent appearance of `|.|\]' (period and space);
|#2| will consist of all tokens between that `|.|\]' and the next
|\par| token. \ (The ↑{*par} might be given explicitly, or it might be
generated by a blank line as explained in Chapter@8.) \ For example, when
\TeX\ expands
\begintt
\cs You owe \$5.00. Pay it.\par
\endtt
the first argument is `|You owe \$5.00|' and the second is `|Pay it.|'.
Notice that the period in `|\$5.00|' doesn't stop |#1|, in this example,
because \TeX\ keeps going until finding a period that is followed immediately
by a space.
\ddanger Furthermore, an argument will not stop when its delimiter is
enclosed in braces, because that would produce unbalanced braces. For example,
in the slightly revised definition
\begintt
\def\cs #1.#2\par{...}
\endtt
the first argument is now delimited by a single period, so |#1| would be
`|You owe \$5|' and the |#2| would be `|00. Pay it.|' if\/ |\cs| were
invoked as above. But
\begintt
\cs You owe {\$5.00}. Pay it.\par
\endtt
satisfactorily hides the first period, making it part of argument |#1|,
which becomes \hbox{`|You owe {\$5.00}|'}.
\ddanger If you are designing a format for mathematical papers, you will
probably want to include a macro for the statement of ↑{theorems},
definitions, lemmas, corollaries, and such things. For example, you might
want to typeset a statement like\enddanger
\proclaim Theorem 1. \TeX\ has a powerful macro capability.
\noindent (followed by a blank line). In fact, plain \TeX\ includes a
↑{:proclaim} macro that does just that; its definition is
\begintt
\def\proclaim #1. #2\par{\medbreak
\noindent{\bf#1.\enspace}{\sl#2}\par\medbreak}
\endtt
↑(enunciations, see proclaim) ↑(enspace)
so the arguments are delimited exactly as in our |\cs| example. The
replacement text here uses |\medbreak| to separate the proclaimed
paragraph from what precedes and follows; the title of the proclamation is
set in bold face type, while the text itself is set slanted. \ (The
actual definition of\/ |\proclaim| in Appendix@B is not quite the same as
this; the final |\medbreak| has been modified so that a break between
pages will be discouraged immediately following the statement of a
theorem. Hence a short theorem will tend to appear at the top of a page
rather than at the bottom.)
\ddanger By making changes to the |\proclaim| macro, you can change the
format of all the proclamations in your paper, without changing the text
of the paper itself. For example, you could produce something like\enddanger
\medbreak
\noindent T{\sevenrm HEOREM} 1:\enspace
{\it\TeX\ has a powerful macro capability.}
\medbreak\noindent
by making simple alterations to the replacement text of\/ |\proclaim|,
assuming that you have a ``↑{caps and small caps}'' font. \TeX\ is
intended to support higher-level languages for composition in which all of
the control sequences that a user actually types are macros rather than
\TeX\ primitives. The ideal is to be able to describe important classes of
documents in terms of their components, without mentioning actual fonts or
point sizes or details of spacing; a single style-independent document
can then be set in many different styles.
\ddanger Now that we have seen a number of examples, let's look at the
precise rules that govern \TeX\ macros. Definitions have the general form
$$\displaybox{|\def|\<control sequence>\<parameter text>|{|%
\<replacement text>|}|}$$
where the \<parameter text> contains no ↑{braces}, and where all occurrences
of |{| and |}| in the \<replacement text> are properly nested. Furthermore
the |#| symbol has a special significance: In the \<parameter text>, the
first appearance of |#| must be followed by@|1|, the next by@|2|, and
so on; up to nine |#|'s are allowed. In the \<replacement text> each@|#|
must be followed by a digit that appeared after@|#| in the \<parameter text>,
or else the@|#| should be followed by another@|#|. The latter case stands
for a single@|#| token when the macro is expanded; the former case stands
for insertion of the corresponding argument.
\ddanger For example, let's consider a ``random'' definition that doesn't
do anything useful except that it does exhibit \TeX's rules. The definition
\begintt
\def\cs AB#1#2C$#3\$ {#3{ab#1}#1 c##\x #2}
\endtt
says that the control sequence |\cs| is to have a parameter text consisting of
nine tokens
$$\displaybox{|A|$_{11}$, \ |B|$_{11}$, \ |#1|, \ |#2|, \ |C|$_{11}$, \
|$|$_3$, \ |#3|, \ \cstok{\char`$}, \ \]$_{10}$}$$
(assuming the ↑{category codes} of plain \TeX), and a replacement text
of twelve tokens
$$\displaybox{|#3|, \ |{|$_1$, \ |a|$_{11}$, \ |b|$_{11}$, \ |#1|, \
|}|$_2$, \ |#1|, \ \]$_{10}$, \ |c|$_{11}$, \ |#|$_6$, \ \cstok{x}, \ |#2|.}$$
Henceforth when \TeX\ reads the control sequence |\cs| it will expect that
the next two tokens will be |A|$_{11}$ and |B|$_{11}$ (otherwise you will
get the error message `|Use| |of| |\cs| |doesn't| |match| |its|
|definition|'); then comes argument@|#1|, followed by argument@|#2|,
then@|C|$_{11}$, then@|$|$_3$, then argument@|#3|, then |\$|, and finally
a space token. It is customary to use the word ``argument'' to mean the
string of tokens that gets substituted for a parameter; parameters appear
in a definition, and arguments appear when that definition is used. \ (For
the purposes of these rules, we are extending Chapter@7's definition of
↑{token}: In addition to control sequences and (character code, category
code) pairs, \TeX\ also recognizes ``↑{parameter tokens},'' denoted here
by |#1|@to@|#9|. Parameter tokens can appear only in token lists for macros.)
\ddanger How does \TeX\ determine where an argument stops, you ask.
Answer: There are two cases. A {\sl↑{delimited parameter}\/} is followed
in the \<parameter text> by one or more non-parameter tokens, before
reaching the end of the parameter text or the next parameter token;
in this case the corresponding argument is the shortest (possibly empty)
sequence of tokens with properly nested |{...}| groups that is followed in
the input by this particular list of non-parameter tokens. \ (Category
codes and character codes must both match, and control sequence names
must be the same.) \ An {\sl↑{undelimited parameter}\/} is followed immediately
in the \<parameter text> by a parameter token, or it occurs at the very end
of the parameter text; in this case the corresponding argument is the
next nonblank token, unless that token is `|{|', when the argument will
be the entire |{...}| group that follows. In both cases, if the argument
found in this way has the form `|{|\<nested tokens>|}|', where \<nested
tokens> stands for any sequence of tokens that is properly nested with
respect to braces, the outermost braces enclosing the argument are removed
and the \<nested tokens> will remain. For example, let's continue with
|\cs| as defined above, and suppose that the subsequent text contains
\begintt
\cs AB {\Look}C${And\$ }{look}\$ 5.
\endtt
Argument |#1| will be the token \cstok{Look}, since |#1| is an undelimited
parameter (it is followed immediately by@|#2| in the definition);
in this case \TeX\ ignores the blank space after |B|, and strips the
braces off@of |{\Look}|. Argument@|#2| will be empty, since |C$| follows
immediately. And argument@|#3| will be the thirteen tokens corresponding
to the text |{And\$|\]|}{look}|, because |#3| is to be followed by `|\$|\]',
and because the first occurrence of `|\$|\]' is within braces. Even though
argument@|#3| begins with a left brace and ends with a right brace, the
braces are not removed, since that would leave the unnested tokens `|And\$
}{look|'. The net effect then, after substituting arguments for
parameters in the replacement text, will be that \TeX\ will next read the
token list
\begintt
{And\$ }{look}{ab\Look}\Look|]c#\x5.
\endtt
The space \] here will be part of the resulting token list, even though
it follows the control word |\Look|, because ↑{spaces} are removed
after ↑{control word} tokens only when \TeX\ first converts input to
token lists as described in Chapter@8.
\ddangerexercise The example definition of\/ |\cs| includes a |##| in
its replacement text, but the way |##| is actually used in that example
is rather pointless. Give an example of a definition where |##| serves
a useful purpose. ↑(sharp sharp)
\answer The |##| feature is indispensible when the replacement text of
a definition contains other definitions. For example, consider
\begintt
\def\a#1{\def\b##1{##1#1}}
\endtt
after which `|\a!|' will expand to `|\def\b#1{#1!}|'. We will see later
that |##| is also important for alignments; see, for example, the definition
of\/ |\matrix| in Appendix@B.
\ddanger A special extension is allowed to these rules: If the very
last character of the \<parameter text> is@|#|, so that this@|#| is
immediately followed by@|{|, \TeX\ will behave as if the@|{| had been
inserted at the right end of both the parameter text and the replacement text.
For example, if you say `|\def\a#1#{\hbox to #1}|', the
subsequent text `|\a3pt{x}|' will expand to `|\hbox to 3pt{x}|', because
the argument of\/ |\a| is delimited by a right brace.
↑(dimensions as arguments)
\ddanger Tokens that precede the first parameter token in the \<parameter
text> of a definition are required to follow the control sequence; in
effect, they become part of the control sequence name. For example, the author
might have said
\begintt
\def\TeX/{...}
\endtt
instead of defining ↑{:TeX} without the slash. Then it would be necessary
to type |\TeX/| each time the \TeX\ logo is desired, but the new definition
would have the advantage that spaces are {\sl not\/} ignored after
`|\TeX/|'. You can use this idea to define macros that are intended to
be used in sentences, so that users don't have to worry about the possible
disappearance of ↑{spaces}.
\ddangerexercise Define a control sequence |\a| such that |\a{...}| expands
to |\b{...}|, and such that \TeX\ gives an error message if\/ |\a| is not
immediately followed by a left brace.
\answer |\def\a#{\b}|.
\ddanger Complicated macros have a habit of behaving differently from what
you expect, when you first define them, even though \TeX's rules are
not especially complicated. If you have trouble understanding why some
|\def| doesn't work the way you think it should, help is available:
You can set |\tracingmacros=1|, whereupon \TeX\ will write something in
your log file whenever it expands a macro, and whenever it has read a
macro argument. For example, if\/ ↑{*tracingmacros} is nonzero when
\TeX\ processes the |\cs| example above, it will put the following
four lines into the log: ↑(debugging macros)
\begintt
\cs AB#1#2C$#3\$ ->#3{ab#1}#1 c##\x #2
#1<-\Look
#2<-
#3<-{And\$ }{look}
\endtt
\ddanger In all of these rules, `|{|' and `|}|' and `|#|' stand for any
characters whose ↑{category codes} are respectively 1, 2, and 6 in the token
list when \TeX\ reads the macro definition; there's nothing sacred about the
particular symbols that plain \TeX\ uses to denote grouping and parameters.
You can even make use of several different characters with these category codes,
all at the same time.
\ddangerexercise Suppose that `|[|', `|]|', and `|!|' have the respective
catcodes 1,@2, and@6, as do `|{|',@`|}|', and@`|#|'. See if you can guess
what the following definition means:
\begintt
\def\!!1#2![{!#]#!!2}
\endtt
What token list will result when `|\! x{[y]][z}|' is expanded?
\answer Let's go slowly on this one, so that the answer will give enough
background to answer all similar questions. The \<parameter text> of the
definition consists of the three tokens |#1|, |#2|, |[|$_1$; the
\<replacement text> consists of the six tokens |{|$_1$, |#|$_6$, |]|$_2$,
|!|$_6$, |#2|, |[|$_1$. \ (When two tokens of category@6 occur in the
replacement text, the character code of the second one survives; the
character code of a category-6 character is otherwise irrelevant. Thus,
`|\def\!#1!2#[{##]!!#2]|' would produce an essentially identical
definition.) \ When expanding the given token list, argument@|#1| is
|x|$_{11}$, since it is undelimited. Argument@|#2| is delimited by@|[|$_1$,
which is different from@|{|$_1$, so it is set provisionally to |{[y]]|;
but the outer ``braces'' are stripped off, so |#2|@reduces to the
three tokens |[|$_1$, |y|$_{11}$,@|]|$_2$. The result of the expansion
is therefore
$$\displaybox{|{|$_1$ |#|$_6$ |]|$_2$ |!|$_6$ |[|$_1$ |y|$_{11}$ |]|$_2$
|[|$_1$ |z|$_{11}$ |}|$_2$.}$$
Incidentally, if you display this with ↑{*tracingmacros}|=1|, \TeX\ says
\begintt
\!!1#2[->{##]!!#2[
#1<-x
#2<-[y]
\endtt
Category codes are not shown, but a character of category@6 always
appears twice in succession. A parameter token in the replacement text
uses the character code of the final parameter in the parameter text.
↑(token lists, as displayed by TeX)
\ddanger In practice, we all make mistakes. And one of the most common
typographic errors is to forget a@`|}|', or to insert an extra@`|{|',
somewhere in an argument to a macro. If \TeX\ were to follow the rules
blindly in such a case, it would have to keep absorbing more and
more tokens in hopes of finding the end of the argument. But a mistyped
argument is unending, like so many arguments in real life (sigh); so
\TeX\ would have to go on until the end of the file, or (more likely)
until tokens completely fill the computer's memory. In either case, a
single typographical error would have ruined the run, and the user would
be forced to start over. Therefore \TeX\ has another rule, intended
to confine such errors to the paragraph in which they occur: {\sl The
token `\thinspace↑{*par}' is not allowed to occur as part of an argument},
unless you explicitly tell \TeX\ that |\par| is OK. Whenever \TeX\ is
about to include |\par| as part of an argument, it will abort the current
macro expansion and report that a ``↑{runaway} argument'' has been found.
\ddanger If you actually want a control sequence to allow arguments with
|\par| tokens, you can define it to be a ``long'' macro by saying
`↑{*long}' just before `|\def|'. For example, the |\bold| macro
defined by
\begintt
\long\def\bold#1{{\bf#1}}
\endtt
is capable of setting several paragraphs in boldface type. \ (However,
such a macro is not an especially good way to typeset bold text.
It would be better to say, e.g.,
\begintt
\def\beginbold{\begingroup\bf}
\def\endbold{\endgroup}
\endtt
because this doesn't fill \TeX's memory with a long argument.)
\ddanger The |\par|-forbidding mechanism doesn't catch all conceivable
missing-brace errors, however; you might forget the |}| at the end of a
|\def|, and the same problem would arise. In this case it's harder to
confine the error, because |\par| is a useful thing in replacement
texts; we wouldn't want to forbid |\par| there, so \TeX\ has another
mechanism: When a macro definition is preceded by `↑{*outer}', the
corresponding control sequence will not be allowed to appear in any place
where tokens are being absorbed at high speed.
An |\outer| macro cannot appear in an argument (not even when |\par| is
allowed), nor can it appear in the parameter text or the replacement text
of a definition, nor in the ↑{preamble} to an alignment, nor in ↑{conditional
text} that is being skipped over. If an |\outer| macro does show up in
such places, \TeX\ stops what it is doing and reports either a
``runaway'' situation or an ``↑{incomplete}'' conditional. The ↑{end of an
input file} is also considered to be |\outer| in this sense; for example, a
file shouldn't end in the middle of a definition. If you are designing a
format for others to use, you can help them detect errors before too much
harm is done, by using |\outer| with all control sequences that should
appear only at ``quiet times'' within a document. For example, Appendix@B
defines ↑{:proclaim} to be |\outer|, since a user shouldn't be stating a
theorem as part of a definition or argument or preamble.
\ddanger We have now seen that |\def| can be preceded by |\long|
or@|\outer|, and it can also be preceded by ↑{*global} if the definition
is supposed to transcend its group. These three prefixes can be applied
to |\def| in any order, and they can even appear more than once.
\TeX\ also has a ↑{*gdef} primitive that is equivalent to |\global\def|.
Thus, for example,
\begintt
\long\global\outer\long\def
\endtt
means the same thing as `|\outer\long\gdef|'.
\ddanger So far in this manual we have encountered several ways to
assign a meaning to a control sequence:
$$\halign{\indent#\hfil\quad\hfil\cr
|\font\cs=|\<external font name>&makes |\cs| a font identifier;\cr
|\chardef\cs=|\<number>&makes |\cs| a character code;\cr
|\mathchar\cs=|\<number>&makes |\cs| a math code;\cr
|\def\cs...{...}|&makes |\cs| a macro.\cr
\noalign{\medskip \hbox{It's time now to reveal another important command
of this type:} \medskip}
|\let\cs=|\<token>&gives |\cs| the token's current meaning.\cr}$$
↑(*let)
If the \<token> is another control sequence, |\cs| will acquire the
same significance as that control sequence. For example, if you say
`|\let\a=\def|', you could then say `|\a\b...{...}|' to define a macro@|\b|,
because |\a| would behave like \TeX's primitive |\def| command.
If you say
\begintt
\let\a=\b \let\b=\c \let\c=\a
\endtt
you have interchanged the former meanings of\/ |\b| and |\c|. And if you say
\begintt
\outer\def\a#1.{#1:}
\let\b=\a
\endtt
the effect is the same as `|\outer\def\b#1.{#1:} \let\a=\b|'.
\ddanger If the \<token> in a |\let| is a single character---i.e.,
if it is a (character code, category code) pair---then
the control sequence will behave to a certain extent like that
character; but there are some differences. For example, after
`|\let\zero=0|' you can't use |\zero| in a numerical constant, because
\TeX\ requires the tokens in a numerical constant to be digits, after
macro expansion; |\zero| is not a macro, so it doesn't expand. However,
such uses of\/ |\let| have their value, as we will see later.
\ddangerexercise Is there a significant difference between `|\let\a=\b|'
and `|\def\a{\b}|'?
\answer Yes indeed. In the first case, |\a| receives the meaning of\/@|\b|
that is current at the time of the |\let|. In the second case, |\a|@becomes
a@macro that will expand into the token@|\b| whenever |\a|@is used,
so it has the meaning of\/@|\b| that is current at the time of use.
You need |\let|, if you want to interchange the meanings of\/ |\a| and@|\b|.
\ddangerexercise Experiment with \TeX\ to discover the answers to the
following questions: (a)@If the control sequence ↑{*par} has been redefined
(e.g., `|\def\par{\endgroup\par}|'), is |\par| still forbidden to appear
in an argument? \ (b)@If you say |\let\xpar=\par|, is |\xpar| also
forbidden in an argument?
\answer (a) Yes. \ (b) No, any other control sequence can appear
(except those declared as |\outer| macros).
\ddanger \TeX\ also allows the construction
`↑{*futurelet}|\cs|\<token$_1$>\<token$_2$>', which has the effect of
`|\let\cs = |\<token$_2$>\<token$_1$>\<token$_2$>'. The idea is that you can
say, for example, `|\futurelet\a\b|' at the end of the replacement text of
a macro; \TeX@will set |\a| to the token that follows the macro, after
which |\b| will be expanded. The control sequence@|\b| can continue the
processing, and it can examine |\a| to see what's coming up next.
↑(looking ahead)
\danger The next thing a person wants, after getting used to macros with
parameters, is the ability to write macros that change their behavior
depending on current conditions. \TeX\ provides a variety of primitive
commands for this purpose. The general form of such ``↑{conditional text}'' is
$$\displaybox{|\if|\<condition>\<true text>|\else|\<false text>|\fi|}$$
where the \<true text> is skipped unless the \<condition> is true, and
the \<false text> is skipped unless the \<condition> is false.
If the \<false text> is empty, you can omit the@↑{*else}. The
`|\if|\<condition>' part of this construction begins with a control
sequence whose first two letters are `|if|'; for example,
\begintt
\ifodd\count0 \rightpage \else\leftpage \fi
\endtt
↑(*ifodd)
specifies a condition that is true when \TeX's integer register ↑{*count}|0|
is odd. Since \TeX\ generally keeps the current ↑{page number} in
|\count0|, the macro |\rightpage| will be expanded in this example if
the page number is odd, while |\leftpage| will be expanded if the
page number is even. Conditional commands always end with a final `↑{*fi}'.
\danger Conditionals are primarily intended for experienced \TeX\ users,
who want to define high-level macros; therefore
the remaining paragraphs in this chapter are headed by ``double
dangerous bends.'' Do not feel guilty about skipping right to Chapter@21;
in other words, imagine that the manual says `|\ifexperienced|' right
here, and that there is a matching `|\fi|' at the end of the present chapter.
\ddanger Before we discuss \TeX's repertoire of\/ |\if...|\ commands, let's
look at another example, so that the general ideas will be clear. Suppose
that the integer register |\count\balance| holds an amount that somebody
has paid in excess of his or her income tax; this amount is given in
pennies, and it might be positive, negative, or zero. Our immediate goal
will be to write a \TeX\ macro that generates a suitable statement for the
↑{Internal Revenue Service} to include as part of a letter to that person,
based on the amount of the balance. The statement will be quite different
for positive balances than for negative ones, so we can exploit \TeX's ability
to act conditionally:
\begintt
\def\statement{\ifnum\count\balance=0 \fullypaid
\else\ifnum\count\balance>0 \overpaid
\else\underpaid
\fi
\fi}
\endtt
Here ↑{*ifnum} is a conditional command that compares two numbers; the
|\statement| macro reduces to |\fullypaid| if the balance is zero, and so on.
\ddanger It is vastly important to notice the spaces after the |0|'s in
this construction. If the example had said
\begintt
...=0\fullypaid...
\endtt
then \TeX\ would have begun to expand `|\fullypaid|' before it knew the
value of the constant |0|, because |\fullypaid| might start with a@|1| or
something that would change the number. \ (After all, `|01|' is a perfectly
acceptable \<number>, in \TeX's eyes.) \ In this particular case the
program would still have worked, because we will see in a moment that
|\fullypaid| begins with the letter@|Y|; thus, the only problem caused by
the missing space would be that \TeX\ would go slower, since it would have
to skip over the whole expansion of\/ |\fullypaid| instead of just skipping
|\fullypaid| as a single, unexpanded token. But in other situations a
missing space like this might cause \TeX\ to expand macros when you don't
want any expansion, and such anomalies can cause subtle and confusing errors.
For best results, {\sl always put a blank space after a numeric constant\/};
this blank space tells \TeX\ that the constant is complete, and
such a space will never ``get through'' to the output. In fact, when you
don't have a blank ↑{space after a constant}, \TeX\ actually has to do
more work, because each constant continues until a non-digit has been
read; if this non-digit is not a space, \TeX\ takes the token you did have
and backs it up, ready to be read again. \ (On the other hand, the author
often omits the space when a constant is immediately followed by some
other character, because extra spaces do look funny in the file;
aesthetics are more important than@efficiency.)
\ddangerexercise Continuing the IRS example, assume that |\fullypaid|
and |\underpaid| are defined as follows:
\begintt
\def\fullypaid{Your taxes are fully paid---thank you.}
\def\underpaid{{\setcount0=-\count\balance
\ifnum\count0<100
You owe \dollaramount, but you need not pay it, because
our policy is to disregard amounts less than \$1.00.
\else Please remit \dollaramount\ within ten days,
or additional interest charges will be due.\fi}}
\endtt
Write a macro |\overpaid| to go with these, assuming that |\dollaramount|
is a macro that generates the contents of\/ |\count0| in dollars and cents.
Your macro should say that a check will be mailed under separate cover,
unless the amount is less than \$1.00, in which case the person must
specifically request a check.
\answer |\def\overpaid{{\setcount0=\count\balance|\par
| You have overpaid your tax by \dollaramount.|\par
| \ifnum\count0<100 It is our policy to refund|\par
| such a small amount only if you ask for it.|\par
| \else A check for this amount is being mailed|\par
| under separate cover.\fi}}|
\ddanger Now let's make a complete survey of \TeX's conditional commands.
Some of them involve features that have not yet been introduced in this
manual.\enddanger
\nobreak\medskip
\item\bull↑{*ifnum}\<number$_1$>\<relation>\<number$_2$>\quad
(compare two integers)
\nobreak\smallskip\noindent
The ↑{<relation} must be either `|<|' or `|=|' or `|>|'. The two integer
numbers are compared to each other in the usual way, and the result is
true or false accordingly.
\medbreak
\item\bull↑{*ifdim}\<dimen$_1$>\<relation>\<dimen$_2$>\quad
(compare two dimensions)
\nobreak\smallskip\noindent
This is like |\ifnum|, but it compares two \<dimen> values. For example,
to test whether the value of\/ |\hsize| exceeds $100\pt$, you can say
`|\ifdim\the\hsize>100pt|'.
\medbreak
\item\bull↑{*ifodd}\<number>\quad(test for odd integer)
\nobreak\smallskip\noindent
The condition is true if the \<number> is odd, false if it is even.
\medbreak
\item\bull↑{*ifvmode}\quad(test for vertical mode)
\nobreak\smallskip\noindent
True if \TeX\ is in vertical mode or internal vertical mode (see Chapter@13).
\medbreak
\item\bull↑{*ifhmode}\quad(test for horizontal mode)
\nobreak\smallskip\noindent
True if \TeX\ is in horizontal mode or restricted horizontal mode
(see Chapter@13).
\medbreak
\item\bull↑{*ifmmode}\quad(test for math mode)
\nobreak\smallskip\noindent
True if \TeX\ is in math mode or display math mode (see Chapter@13).
\medbreak
\item\bull↑{*ifinner}\quad(test for an internal mode)
\nobreak\smallskip\noindent
True if \TeX\ is in internal vertical mode, or restricted
horizontal mode, or (nondisplay) math mode (see Chapter@13).
\medbreak
\item\bull↑{*if}\<token$_1$>\<token$_2$>\quad(test if character codes agree)
\nobreak\smallskip\noindent
\TeX\ will expand macros following |\if| until two unexpandable tokens are
found. If either token is a control sequence, \TeX\ considers it to have
character code@256 and category code@16, unless the current equivalent of
that control sequence has been |\let| equal to a non-active character
token. In this way, each token specifies a (character@code,
\hbox{category}@code) pair. The condition is true if the character codes
are equal, independent of the category codes. For example, after
|\def\a{*}| and |\let\b=*| and |\def\c{/}|, the tests `|\if*\a|' and
`|\if\a\b|' will be true, but `|\if\a\c|' will be false. Also
`|\if\a\par|' will be false, but `|\if\par\let|' will be true. % Beresford=true
\medbreak
\item\bull↑{*ifcat}\<token$_1$>\<token$_2$>\quad(test if category codes agree)
\nobreak\smallskip\noindent
This is just like |\if|, but it tests the ↑{category codes}, not the character
codes.
\medbreak
\item\bull↑{*ifx}\<token$_1$>\<token$_2$>\quad(test if tokens agree)
\nobreak\smallskip\noindent
In this case, \TeX\ does {\sl not\/} expand control sequences when it looks
at the two tokens. The condition is true if (a)@the two tokens are not macros,
and they both represent the same (character code, category code) pair or the
same \TeX\ primitive or the same ↑{*chardef} or ↑{*mathchardef} or ↑{*font};
or if (b)@the two tokens are macros, and they both have the same status with
respect to ↑{*long} and ↑{*outer}, and they both have the same ``top level''
expansion. For example, after `|\def\a{\c}| |\def\b{\d}| |\def\c{\e}|
|\def\d{\e}| |\def\e{A}|', an |\ifx| test will find |\c| and |\d| equal,
but not |\a| and@|\b|, nor |\d| and@|\e|, nor any other combinations of
|\a|, |\b|, |\c|, |\d|, |\e|.
\medbreak
\item\bull↑{*ifvoid}\<number>\quad(test if a box register is void)
\nobreak\smallskip\noindent
The \<number> should be between 0 and 255. The condition is true if
that |\box| is void (see Chapter@15).
\medbreak
\item\bull↑{*ifeof}\<number>\quad(test for end of file)
\nobreak\smallskip\noindent
The \<number> should be between 0 and 15. The condition is true unless the
corresponding input stream is open and not fully read. \ (See the command
↑{*openin} below.)
\medbreak
\item\bull↑{*iftrue}, ↑{*iffalse}.\quad(always true or always false)
\nobreak\smallskip\noindent
These conditions have a predetermined outcome. But they turn out to be
useful in spite of this, as explained below.
\medbreak
Finally, there's one more conditional construction, which is somewhat different
from the rest because it is capable of making a many-way branch:
$$\halign{\indent#\hfil\cr
|\ifcase|\<number>\<text for case 0>|\or|\<text for case 1>|\or|$\;\cdots$\cr
| \or|\<text for case $n$>|\else|\<text for all other cases>|\fi|\cr}$$
↑(*ifcase)
Here there are $n+1$ cases separated by $n$ ↑{*or}'s, where $n$ can be any
nonnegative number. Once again the ↑{*else} part is optional, if you
don't want to specify any text for cases when the \<number> is negative
or greater than@$n$.
\ddangerexercise Design a |\category| macro that prints a character's current
category code symbolically, given a one-character control sequence for
that character. For example, if the category codes of plain \TeX\ are in
force, `|\category\\|' should expand to `|escape|', and `|\category\a|'
should expand to `|letter|'.
\answer |\def\category#1{\ifcase\the\catcode`#1|\par
| escape\or begingroup\or endgroup\or math\or|\par
| align\or endline\or parameter\or superscript\or|\par
| subscript\or ignored\or space\or letter\or|\par
| otherchar\or active\or comment\or invalid\fi}|\par
\ddangerexercise Test yourself on the following questions to see if you
understand certain borderline situations: After the definitions
`|\def\a{}| |\def\b{**}| |\def\c{True}|',
which of the following conditions are true?
(a)@`|\if\a\b|';
(b)@`|\ifcat\a\b|';
(c)@`|\ifx\a\b|';
(d)@`|\if\c|';
(e)@`|\ifcat\c|';
(f)@`|\ifx\ifx\ifx|'.
\answer (a,b)@True. (c,d)@False. (e,f)@True. In case@(e), the \<true text>
starts with `|ue|'.
\ddangerexercise Continuing the previous exercise, is the following
condition true or false?
\begintt
\if\ifx\a\b\c\else\if\a\b\c\fi\fi
\endtt
\answer The |\ifx| is false and the inner |\if| is true; so
the outer |\if| becomes `|\if True...|', which is false. (Interestingly,
\TeX\ knows that the outer |\if| is false even before it has looked at
the |\fi|'s that close the |\ifx| and the inner |\if|.)
\ddanger Notice that all of the control sequences for conditionals
begin with |\if...|, and they all have a matching@|\fi|. This convention---that
|\if...|\ pairs up with |\fi|---makes it easier to see the nesting
of conditionals within your program. The nesting of\/ |\if...\fi| is
independent of the nesting of |{...}|; thus, you can begin or end a ↑{group}
in the middle of a conditional, and you can begin or end a conditional
in the middle of a group. Extensive experience with macros has shown that
such independence is important in applications; but it can also lead to
confusion if you aren't careful.
\ddanger It's sometimes desirable to pass information from one macro to
another, and there are several ways to do this: ↑(communication between macros)
by passing it as an argument, by putting it into a register, or by
defining a control sequence that contains the information. For example,
when the author first defined the macros |\hphantom|, |\vphantom|, and
↑{:phantom} in Appendix@B\null, he wanted to do most of the work in other macros
|\phant| and |\finphant| that would be common to all three; somehow |\finphant|
was to be told what kind of phantom was desired. The first approach was to
define control sequences |\hph| and |\vph| something like this:
\begintt
\def\hphantom{\ph YN} \def\vphantom{\ph NY} \def\phantom{\ph YY}
\def\ph#1#2{\def\hph{#1}\def\vph{#2}\phant}
\endtt
after which |\finphant| could test `|\if Y\hph|' and `|\if Y\vph|'. This
worked, but there were various ways to make it more efficient; for example,
`|\def\hph{#1}|' could be replaced by `|\let\hph=#1|', avoiding macro expansion.
An even better idea then suggested itself:
\begintt
\def\yes{\if00} \def\no{\if01}
\def\hphantom{\ph\yes\no}...\def\phantom{\ph\yes\yes}
\def\ph#1#2{\let\ifhph=#1\let\ifvph=#2\phant}
\endtt
after which |\finphant| could test `|\ifhph|' and `|\ifvph|'. \ (This
construction was tried before |\iftrue| and |\iffalse| were part of the
\TeX\ language.) \ The idea worked fine, so the author started to use
|\yes| and |\no| in a variety of other situations. But then one day a
complex conditional failed, because it contained an |\ifhph|-like test
inside another conditional:
\begintt
\if... \ifhph...\fi ... \else ... \fi
\endtt
Do you see the problem that developed? When the \<true text> of the
outermost conditional was executed, everything worked fine, because
|\ifhph| was either |\yes| or |\no| and it expanded into either |\if00| or
|\if01|. But when the \<true text> was skipped, the |\ifhph| was not
expanded, so the first |\fi| was mistakenly paired with the first |\if|;
everything soon went haywire. That's when ↑{*iftrue} and ↑{*iffalse}
were put into the language, in place of\/ |\yes| and |\no|; now
|\ifhph| is either |\iftrue| or |\iffalse|, so \TeX\ will match it
properly with a closing@|\fi|, whether or not it is being skipped over.
\ddanger To facilitate |\if...|@constructions, plain \TeX\ has a
↑{:newswitch} macro, such that after you say `|\newswitch{abc}|' three
control sequences will be defined: |\ifabc| (for testing the switch),
|\abctrue| (for making the switch true), and |\abcfalse| (for making
it false). The |\phantom| problem is now solved in Appendix@B by writing
\begintt
\newswitch{hph} \newswitch{vph}
\def\hphantom{\hphtrue\vphfalse\phant}
\endtt
and with similar definitions of\/ |\vphantom| and |\phantom|. There is no
longer any need for a |\ph| macro; again |\finphant| tests |\ifhph| and
|\ifvph|. Appendix@E contains other examples of switches created
by@|\newswitch|.
\ddanger Caution: Don't say anything like `|\let\ifabc=\iftrue|' in
conditional text. If \TeX\ skips over this command, it will think that
the |\iftrue| requires a matching |\fi|, since the |\let| is not being
executed! Keep such commands buried inside macros, so that |\TeX| will
see the `|\if...|' only when it is not skipping over the text that it
is reading.
\ddanger Everyone who makes extensive use of a powerful macro facility
encounters situations when the macros do surprising things. We have
already mentioned the possibility of setting |\tracingmacros=1|, in order
to see when \TeX\ expands macros and what arguments it finds. There's
also another helpful way to watch what \TeX\ is doing: If you set
↑{*tracingcommands}|=1|, \TeX\ will show every command that it executes.
Furthermore, if you set \hbox{|\tracingcommands=2|}, \TeX\ will show all
conditional commands and their outcomes, as well as the unconditional
commands that are actually performed. This diagnostic information
goes into your log file. You can also see it on your terminal,
if you say ↑{*tracingonline}|=1|.
\ddanger One way to understand the occasional strangeness of macro
operation is to use the tracing features just described, so that you can
watch what \TeX\ does in slow motion. Another way is to learn the rules for
how macros are expanded; we shall now discuss those rules.
\ddanger \TeX's mastication process converts your input to a long token
list, as explained in Chapter@8; and its digestive processes work
strictly on this token list. When \TeX\ encounters a control sequence in
the token list, it looks up the current meaning, and in certain cases
it will expand that token into a sequence of other tokens before
continuing to read. The expansion process applies to macros and to
certain other special primitives like |\number| and |\if| that we shall
consider momentarily. Sometimes, however, the ↑{expansion} is not carried out;
for example, when \TeX\ is taking care of a |\def|, the \<control sequence>,
the \<parameter text>, and the \<replacement text> of that |\def| are
not subject to expansion. Similarly, the two tokens after |\ifx| are
never expanded. A complete list of occasions when tokens are not expanded
appears on the next page; you can use it for reference in an emergency.
\ddanger Now let's consider the control sequences that are expanded whenever
expansion has not been inhibited. Such control sequences
fall into several classes:\enddanger
\nobreak\medskip
\textindent\bull Macros. When a macro is expanded, \TeX\ first determines its
arguments (if any), as explained earlier in this chapter. Each argument
is a token list; the tokens are not expanded when they are being
accepted as arguments. Then \TeX\ replaces the macro and its arguments
by the replacement text.
\smallbreak
\textindent\bull Conditionals. When an |\if...| is expanded, \TeX\ reads
ahead as far as necessary to determine whether the condition is true or
false; and if false, it skips ahead (keeping track of\/ |\if...\fi| nesting)
until finding the |\else|, |\or|, or@|\fi| that ends the skipped text.
Similarly, when |\else|, |\or|, or@|\fi| is expanded, \TeX\ reads to the
end of any text that ought to be skipped. The ``expansion'' of a conditional
is empty. \ (Conditionals always reduce the number of tokens that are seen by
later stages of the digestive process, while macros usually increase the
number of tokens.)
\smallbreak
\textindent\bull ↑{*number}\<number>. When \TeX\ expands |\number|, it reads
the \<number> that follows (expanding tokens as it goes); the final
expansion consists of the ↑{decimal representation} of that number,
preceded by `|-|' if negative.
\smallbreak
\textindent\bull ↑{*romannumeral}\<number>. This is like |\number|, but
the expansion consists of lower-case roman numerals. For example,
`|\romannumeral 1982|' produces `|mcmlxxxii|'. The expansion is
empty if the number is zero or negative.
\smallbreak
\textindent\bull ↑{*string}\<token>. \TeX\ first reads the \<token> without
expansion. If a control sequence token appears, its |\string| expansion
consists of the control sequence name (including |\| as an escape
character, if the control sequence isn't simply an active character).
Otherwise the \<token> is a character token, and its character code
is retained as the expanded result.
\smallbreak
\textindent\bull ↑{*jobname}. The expansion is the name that \TeX\ has chosen
for this job. For example, if \TeX\ is putting its output on files
|paper.dvi| and |paper.log|, ↑(.dvi) then |\jobname| expands to `|paper|'.
\smallbreak
\textindent\bull ↑{*csname}|...|↑{*endcsname}. When \TeX\ expands |\csname|
it reads to the matching |\endcsname|, expanding tokens as it goes;
only character tokens should remain after this expansion has taken place.
Then the ``expansion'' of the entire |\csname...\endcsname| text
will be a single control sequence token, defined to be like |\relax| if
it has not previously occurred.
\smallbreak
\textindent\bull ↑{*expandafter}\<token>. \TeX\ first reads the token that
comes immediately after |\expandafter|, without expanding it;
let's call this token@$t$. Then \TeX\ reads the token that comes after@$t$
(and possibly more tokens, if that token has an argument), replacing it by
its expansion. Finally \TeX\ puts@$t$ back in front of that expansion.
\smallbreak
\textindent\bull ↑{*input}\<file name>. The expansion is null; but \TeX\
prepares to read from the specified file before looking at any more
tokens from its current source.
\smallbreak
\textindent\bull ↑{*endinput}. The expansion is null; \TeX\ prepares to stop
reading from whatever input file next gets to the end of an input line.
\smallbreak
\textindent\bull ↑{*topmark}, ↑{*firstmark}, ↑{*botmark}, ↑{*splitfirstmark},
and ↑{*splitbotmark}. \kern-1.7pt % This saves an overfull box (March 27, 1983)
The expansion is the token list in the corresponding ``↑{mark}'' register
(see Chapter@23).
\ddanger Here is the promised list of all cases when tokens are not
expanded. Some of the situations involve primitives of \TeX\ that have
not yet been discussed in this manual, but we'll get to them eventually.
Expansion is suppressed\enddanger
\nobreak\medskip
\item\bull When tokens are being deleted during ↑{error recovery} (see
Chapter@6).
\smallskip
\item\bull When tokens are being skipped because conditional text is being
ignored.
\smallskip
\item\bull When \TeX\ is reading the arguments of a macro.
\smallskip
\item\bull When \TeX\ is reading the control sequence that is to be defined by
↑{*let}, ↑{*futurelet}, ↑{*def}, ↑{*gdef}, ↑{*edef}, ↑{*xdef}, ↑{*chardef},
↑{*mathchardef}, ↑{*read}, and ↑{*font}.
\smallskip
\item\bull When \TeX\ is reading the argument tokens for the primitives
↑{*expandafter}, ↑{*let}, ↑{*futurelet}, ↑{*string}, ↑{*ifx}, and ↑{*show}.
\smallskip
\item\bull When \TeX\ is absorbing the parameter text of a |\def|, |\gdef|,
|\edef|, or |\xdef|.
\smallskip
\item\bull When \TeX\ is absorbing the replacement text of a |\def| or
|\gdef| or ↑{*read}; or the text of a ↑{token parameter} like ↑{*everypar}
or ↑{*output}; or the token list for ↑{*uppercase} or ↑{*lowercase} or
↑{*write}. \ (The token list for |\write| will be expanded later, when it
is actually output to a file.)
\smallskip
\item\bull Just after a token for the primitive commands |\def| or |\gdef| or
|\edef| or |\xdef|, when such a token occurs in the replacement text of
an |\edef| or |\xdef|; or when such a token occurs in the token list for
↑{*mark}, ↑{*message}, ↑{*errmessage}, or ↑{*special}; or when such a token
occurs in a token list for |\write| that is being output to a file.
\smallskip
\item\bull When \TeX\ is reading the preamble of an alignment, except after
a token for the primitive command |\span| or when reading the \<glue>
after ↑{*tabskip}.
\smallskip
\item\bull When \TeX\ is looking for an optional space following an alphabetic
constant, a ↑{<dimen}, the keyword `↑{.true}' in a \<dimen>, or the
|$$| that ends a display. ↑(dollar dollar)
\smallskip
\item\bull Just after a |$| that begins math mode, to see if
another@|$| follows.
\ddanger Sometimes you will find yourself wanting to define new macros
whose replacement text has been expanded, based on current conditions,
instead of simply copying the replacement text verbatim. \TeX\ provides
the ↑{*edef} (expanded definition) command for this purpose, and also
↑{*xdef} (which is equivalent to |\global\edef|). The general format
is the same as for |\def| and |\gdef|, but \TeX\ expands the tokens
of the replacement text according to the rules above. For example, consider
\begintt
\def\double#1{#1#1}
\edef\a{\double{xy}}
\edef\a{\double\a}
\endtt
Here the first |\edef| is equivalent to `|\def\a{xyxy}|' and the second is
equivalent to `|\def\a{xyxyxyxy}|'. All of the other kinds of expansion
will take place too, including conditionals; for example,
\begintt
\edef\b#1#2{\ifmmode#1\else#2\fi}
\endtt
gives a result equivalent to `|\def\b#1#2{#1}|' if \TeX\ is in math
mode at the time of the |\edef|, otherwise the result is equivalent to
`|\def\b#1#2{#2}|'.
\ddanger Expanded definitions that are made with |\edef| or |\xdef| will
also expand tokens in another way, not mentioned above: If a token
for the primitive command `↑{*the}' occurs, it will be expanded (together
with one or more tokens that follow) into the current value of one of
\TeX's internal registers or parameters. This is a powerful way to convert
\TeX's internal information into text that can be used in later commands
(or even in the printed document). For example, `|\the\tolerance|'
might expand into `|200|'; `|\the\skip5|' might expand into
`|5.0pt plus 2.0fil|'. You can use |\the| with a wide variety of
different things:\enddanger
\nobreak\medskip
\textindent\bull |\the|\<parameter>, where \<parameter> is the name of
one of \TeX's ↑{integer parameters} (e.g., |\the\widowpenalty|),
↑{dimension parameters} (e.g., |\the\parindent|), ↑{glue parameters}
(e.g., |\the\leftskip|), ↑{muglue parameters} (e.g., |\the\thinmuskip|),
or ↑{token parameters} (e.g., |\the\everypar|).
\smallbreak
\textindent\bull |\the|\<register>, where \<register> is the name of
one of \TeX's integer ↑{registers} (e.g., |\the\count|\stretch|0|),
dimension registers (e.g., |\the\dimen169|), glue registers (e.g.,
|\the\skip255|), or muglue registers (e.g., |\the\muskip\count2|).
\smallbreak
\textindent\bull |\the|\<codename>\<character number>, where \<codename>
is either ↑{*catcode}, ↑{*mathcode}, ↑{*lccode}, ↑{*uccode}, ↑{*sfcode},
or ↑{*delcode}. For example, |\the\mathcode`/| produces the current
(integer) math code value for a slash.
\smallbreak
\textindent\bull |\the|\<special register>, where \<special register> is
one of the integer quantities ↑{*prevgraf}, ↑{*deadcycles}, ↑{*insertpenalties},
or ↑{*parshape} (only the number of lines of\/ |\parshape| are given); or
one of the dimensions ↑{*pagegoal}, ↑{*pagetotal}, ↑{*pagestretch},
↑{*pagefilstretch}, ↑{*pagefillstretch}, ↑{*pagefilllstretch}, ↑{*pageshrink},
↑{*pagedepth}. In horizontal modes you can also get a special integer,
|\the\spacefactor|; in vertical modes you can also get a special dimension,
|\the\prevdepth|.
\smallbreak
\textindent\bull |\the|\<defined character>, where \<defined character> is
a control sequence that has been given an integer value with ↑{*chardef} or
↑{*mathchardef}; the result is that integer value.
\smallbreak
\textindent\bull |\the|\<font identifier> gives the external font file name
for that identifier; e.g., `|\the\tenrm|' yields `↑{.cmr10}', a sequence of
five character tokens.
\smallbreak
\textindent\bull |\the|↑{*textfont}\<number>, where the \<number> is between
0 and@15, gives the font identifier for the text font in the stated family.
For example, `|\the\textfont0|' yields the control sequence token `|\tenrm|'.
You can also say |\the|↑{*scriptfont}\<number>
and |\the|↑{*scriptscriptfont}\<number> to get the other members of a family.
The ↑{current font identifier} is `|\the|↑{*font}'.
\smallbreak
\textindent\bull |\the|↑{*texinfo}\<font identifier>\<parameter number>.
This produces a dimension; for example, parameter@6 of a font is its
``↑{em}'' value, so `|\the\texinfo\tenrm6|' yields `|10.0pt|'.
\smallbreak
\textindent\bull |\the|↑{*lastskip}. This yields the amount of glue or
muglue in the final item on the current list, or zero if that item
is not a glue item.
\ddanger Whenever |\the| produces a result that is a sequence of character
tokens---for example, integers, dimensions, glue, and font names are
converted to ↑{ascii} characters---the category code@12 (``other'') is
assigned to each token, except that character code@32 gets category@10
(``space''). The same rule is used to assign ↑{category codes} to the
tokens produced by ↑{*number}, ↑{*string}, ↑{*romannumeral}, and ↑{*jobname}.
\ddanger \TeX's primitive command `↑{*showthe}' will display on your
terminal exactly what `|\the|' would produce in an expanded definition;
the expansion is preceded by `|> |' and followed by a period. For example,
`|\showthe\parindent|' will display
\begintt
> 20.0pt.
\endtt
if the plain \TeX\ paragraph indentation is being used.
\ddanger You can also use ↑{*minusthe} instead of\/ |\the|, to get the
negative of the stated value. However, |\minusthe| is not allowed with
nonnumeric quantities (token lists, font identifiers, or external
file names); it makes sense only with integers dimensions, glue, and
muglue. For example, if the |\parfillskip| is $0\pt$ plus $1\,$fil, then
`|\minusthe\parfillskip|' expands to `|0.0pt| |plus| |-1.0fil|'.
\ddanger In case you suffer from stuttering, \TeX\ treats both
|\minusthe\minusthe| and |\the\the| as equivalent to |\the|;
similarly, |\minusthe\the| and |\the\minusthe| are equivalent to |\minusthe|.
\ddanger Exception: A token is not expanded in ↑{*edef} or ↑{*xdef} when
it immediately follows a |\def|, |\gdef|, |\edef|, or |\xdef|. Thus,
if you say
\begintt
\mag=2000 \edef\a{\def\b{\the\mag}}
\endtt
the result is equivalent to `|\def\a{\def\b{2000}}|'.
\ddanger If for some reason you do want expansion after |\def|, you can
obtain it in a tricky way like this:
\begintt
\let\Def=\def \let\def=\relax \edef\a{\def...} \let\def=\Def
\endtt
since expansion is done after |\relax| and all other primitives. \ (You
can change the meaning of any control sequence.) \ Even better would be to say
`↑{*let}|\def=0|' instead of `|\let\def=\relax|', since control sequences
equivalent to characters are not expanded; this second alternative works
even when ↑{*relax} has been redefined.
\ddanger What if you don't want to expand a control sequence? There are
several ways to ↑{avoid expansion}. Suppose, for example, that you
want to define |\a| to be equal to@|\b| (expanded) followed by@|\c|
(not expanded) followed by |\d| (expanded), assuming that |\b| and
|\d| are simple macros without parameters. You can say
\begintt
\let\save=\c \let\c=0 \edef\a{\b\c\d} \let\c=\save
\endtt
as in the previous example. However, this doesn't expand occurrences of@|\c|
that might be present in the expansions of\/ |\b| and@|\d|. Another way,
which is free of this defect, is
\begintt
\edef\next#1{\def\a{\b#1\d}} \next\c
\endtt
(look at this closely, to make sure that you understand why it works). Or
you can say
\begintt
\tokens{\c} \edef\a{\b\the\tokens\d}
\endtt
which uses a ↑{token list parameter} ↑{*tokens} that has been provided
specifically for controlling expansions. When \TeX\ expands `|\the|\<token list
parameter>', it just substitutes the token list without expanding it further.
\ddanger The token list parameters are ↑{*output}, ↑{*everypar},
↑{*everyhbox}, ↑{*everyvbox}, ↑{*everymath}, ↑{*everydisplay},
↑{*everyjob}, and |\tokens|. To define them, say
$$\displaybox{\<token list parameter>|{|\<replacement text>|}|}$$
and the \<replacement text> will be treated almost as if you were using
|\def| without any parameters. However, if you want to put the ↑{macro
parameter character} |#| ↑(sharp) into the replacement text for some
reason, you should type it only once (not twice as with |\def|).
When |\output| is being defined, \TeX\ puts group delimiters `|{|' and
`|}|' before and after the token list, in order to make the
|\output| routine a ↑{group}; such delimiters are not tacked on to the
other token list parameters, only to |\output|.
\ddanger \TeX's primitive commands ↑{*mark}|{...}|, ↑{*message}|{...}|,
↑{*errmessage}|{...}|, ↑{*special}|{...}|, and ↑{*write}\<number>|{...}|
all expand the token lists in braces almost exactly as |\edef| and |\xdef| do;
i.e., |\the| and |\minusthe| are expanded as well as all other tokens,
except following |\def| and its cousins. However, a macro parameter
character like@|#| should not be duplicated in such commands; you need
to say |##| within an |\edef|, but only |#| within a |\mark|. The
|\write| command is somewhat special, because its token list is first
read without expansion; expansion occurs later, when the tokens
are actually being written to a file.
\ddangerexercise Continuing the example of ↑{expansion avoidance}, suppose that
you want to expand |\b| completely until only unexpandable tokens are left,
but you don't want to expand |\c| at all, and you want to expand |\d|
only one level. For example, after |\def\b{\c\c}| and |\def\c{*}| and
|\def\d{\b\c}| the goal would be to get the effect of\/ |\def\a{**\c\b\c}|.
How can such a partial expansion be achieved?
\answer The first solution uses ↑{*everyjob}, since |\everyjob| is irrelevant
after the job has started. Notice the ↑{*expandafter} just before `|{|':
\begintt
\everyjob{\c} \tokens\expandafter{\d}
\edef\a{\b\the\everyjob\the\tokens}
\endtt
The second solution avoids |\everyjob|:
\begintt
\tokens\expandafter{\d}
\edef\next#1{\def\a{\b#1\the\tokens}}\next\c
\endtt
And here's a third solution (please don't take it too seriously, but it
does work):
\begintt
{\setbox0=\vbox{\halign{#{\c\span\d}\cr
\edef\next#1{\gdef\next{\b#1}}\next\cr}}}
\let\a=\next
\endtt
\ddangerexercise Write a |\dollaramount| macro, to complete the Internal
Revenue |\statement| example that appeared earlier in this chapter.
\answer The tricky part is to get the zeroes, in amounts like `|$0.01|'.
↑(leading zeros) ↑(dollar amounts from count registers)
\begintt
\def\dollaramount{\setcount1=\count0 \divcount1 by100
\$\number\count1.%
\multcount1 by-100 \advcount1 by\count0
\advcount1 by100 \def\next1##1{##1}%
\expandafter\next\number\count1 }
\endtt
\ddangerexercise Compare the following two definitions:
\begintt
\def\a{\iftrue{\else}\fi}
\edef\b{\iftrue{\else}\fi}
\endtt
Which of them yields an ↑{unmatched left brace}? (This is tricky.)
\answer Neither one, although |\a| will behave like an unmatched left
brace when it is expanded. The definition of\/ |\b| is {\sl not complete},
because it expands to `|\def\b{{}|'; \TeX\ will continue to read ahead,
looking for another right brace, possibly discovering a runaway
definition! It's impossible to define a macro that has unmatched braces.
But you {\sl can\/} say |\let\a={|; Appendix@D discusses several
other ↑{brace tricks}.
\ddanger \TeX\ has the ability to read individual lines of text from up
to@16 files at once, in addition to the files that are being |\input|.
To initiate reading such an auxiliary file, you should say
$$\displaybox{|\openin|\<number>=\<file name>}$$
↑(*openin)
where the \<number> is between 0 and 15. \ (Plain \TeX\ allocates
input stream numbers 0@through@15 with the ↑{:newread} command, which
is analogous to |\newbox|.) \ In most installations
of \TeX, the extension `↑{..tex}' will be appended to the file name,
as with ↑{*input}, if no extension is given explicitly. If the file
cannot be found, \TeX\ will give no error message; it will simply consider
that the input stream is not open, and you can test this condition
with ↑{*ifeof}. When you're done with a file, you can say
$$\displaybox{|\closein|\<number>}$$
↑(*closein)
and the file associated with that input stream number will be closed,
i.e., returned to its initial condition, if such a file was open.
To get input from an open file, you say
$$\displaybox{|\read|\<number>|to|\<control sequence>}$$
↑(*read) ↑(.to)
and the control sequence is defined to be a parameterless macro whose
replacement text is the contents of the next line read from the
designated file. This line is converted to a token list, using the
procedure of Chapter@8, based on the current category codes.
Additional lines are read, if necessary, until an equal number of
left and right braces has been found. If the
\<number> is not between 0 and@15, or if no such file is open, or if
the file has ended, the user will be prompted to type a line on the
terminal, and this line will be used instead. The macro definition will be
local unless you say |\global\read|.
\ddanger For example, it's easy to have ↑{dialogs with the user}, by
using |\read| together with the ↑{*message} command (which
writes an expanded token list on the terminal and in the log file):
\begintt
\message{Please type your name:}
\read-1 to\myname
\message{Hello, \myname!}
\endtt
The |\read| command in this case will print `|\myname=|' and it will wait
for a response; the response will be echoed on the log file.
\ddangerexercise The |\myname| example just given doesn't work quite right,
because the \<return> at the end of the line gets translated into a
space. Figure out how to fix that glitch.
\answer One way is to redefine |\catcode`\↑↑M=9| (ignored) just before
the |\read|, so that the \<return> will be ignored. Another solution is
to strip off the space as follows:
\begintt
\def\stripspace#1 \next{#1}
\edef\myname{\expandafter\stripspace\myname\next}
\endtt
The second solution doesn't work if the user types `|%|' at the end of
his@or her name, or if the name contains control sequences.
\ddangerexercise Continuing the previous example, define a macro
|\MYNAME| that contains the letters of\/ |\myname| all in ↑{upper-case
letters}. For example, if\/ |\myname| expands to |Arthur|, |\MYNAME|
should expand to |ARTHUR|. Assume that |\myname| contains only
letters and spaces in its expansion.
\answer |\edef\next{\def\MYNAME{\myname}}| ↑{*uppercase}|\expandafter{\next}|
\ddanger Appendices B, D, and E contain numerous examples of how to make
macros do useful things. Let's close this chapter by presenting a few
examples that show how \TeX\ can actually be used as a primitive ↑{programming}
language, if you want to achieve special effects, and if you don't care very
much about computer costs.
\ddanger Plain \TeX\ contains a |\loop...\repeat| construction, which works
like this: You say `↑{:loop}@$\alpha$@|\if...|@$\beta$@|\repeat|', where
$\alpha$ and@$\beta$ are any sequences of commands, and where |\if...|\
is any conditional test (without a matching |\fi|). \TeX\ will first
do@$\alpha$; then if the condition is true, \TeX\ will do@$\beta$ and
repeat the whole process again starting with@$\alpha$. Finally, if the
condition ever turns out to be false, the loop will stop. For example,
here is a program that carries out a little dialog in which \TeX\ waits for
the user to type `|Yes|' or `|No|': ↑(repeating commands, see :loop)
\begintt
\def\yes{Yes } \def\no{No } \newswitch{garbage}
\loop\message{Are you happy?}
\read-1 to\answer
\ifx\answer\yes\garbagefalse % the answer is Yes
\else\ifx\answer\no\garbagefalse % the answer is No
\else\garbagetrue\fi\fi % the answer is garbage
\ifgarbage\message{(Please type Yes or No.)}
\repeat
\endtt
\ddangerexercise Use the |\loop...\repeat| mechanism to construct a
general |\punishment| macro that repeats any given paragraph any given number of
times. For example,
\begintt
\punishment{I must not talk in class.}{100}
\endtt
should produce the results desired in exercise 20.\punishexno.
\answer (Here's a solution that also numbers the lines, so that the number of
repetitions is easily verifiable.
The only tricky part about this answer is the use of\/ ↑{*endgraf},
which is a substitute for |\par| because |\loop| is not a ↑{*long} macro.)
\begintt
\newcount\n
\def\punishment#1#2{\setcount\n=0
\loop\ifnum\count\n<#2 \advcount\n by1
\item{\number\count\n.}#1\endgraf\repeat}
\endtt
\newswitch{prime} \newswitch{unknown}
\newcount\n \newcount\p \newcount\d
\def\primes#1{2,@3% assume that #1 is at least 2
\setcount\n=#1 \advcount\n by-2 % n more to go
\setcount\p=5 % odd primes starting with p
\loop\ifnum\count\n>0 \printifprime\advcount\p by2 \repeat}
\def\printp{, % we will invoke \printp if p is prime
\ifnum\count\n=1 and@\fi % this precedes the last value
\number\count\p \advcount\n by -1 }
\def\printifprime{\testprimality \ifprime\printp\fi}
\def\testprimality{{\setcount\d=3 \global\primetrue
\loop\trialdivision \ifunknown\advcount\d by2 \repeat}}
\def\trialdivision{\setcount0=\count\p \divcount0 by\count\d
\ifnum\count0>\count\d \unknowntrue\else\unknownfalse\fi
\multcount0 by\count\d
\ifnum\count0=\count\p \global\primefalse\unknownfalse\fi}
\ddanger The first thirty prime numbers are \primes{30}. You may not
find this fact very startling; but you may be surprised to learn that
the previous sentence was typeset by saying
\begintt
The first thirty prime numbers are \primes{30}.
\endtt
\TeX\ did all of the calculation by expanding the |\primes| macro, so the author
is pretty sure that the list of ↑{prime numbers} given above is quite free
of typographic errors. Here is the set of macros that did it:
$$\halign{\indent#\hfil\cr
|\newswitch{prime} \newswitch{unknown} % boolean variables|\cr
\noalign{↑(:newswitch)↑(:newcount)}
|\newcount\n \newcount\p \newcount\d % integer variables|\cr
|\def\primes#1{2,@3% assume that #1 is at least 2|\cr
| \setcount\n=#1 \advcount\n by-2 % n more to go|\cr
| \setcount\p=5 % odd primes starting with p|\cr
| \loop\ifnum\count\n>0 \printifprime\advcount\p by2 \repeat}|\cr
|\def\printp{, % we will invoke \printp if p is prime|\cr
| \ifnum\count\n=1 and@\fi % `and' precedes the last value|\cr
| \number\count\p \advcount\n by -1 }|\cr
|\def\printifprime{\testprimality \ifprime\printp\fi}|\cr
|\def\testprimality{{\setcount\d=3 \global\primetrue|\cr
| \loop\trialdivision \ifunknown\advcount\d by2 \repeat}}|\cr
\noalign{\penalty-500}
|\def\trialdivision{\setcount0=\count\p \divcount0 by\count\d|\cr
| \ifnum\count0>\count\d \unknowntrue\else\unknownfalse\fi|\cr
| \multcount0 by\count\d|\cr
| \ifnum\count0=\count\p \global\primefalse\unknownfalse\fi}|\cr
}$$
↑(*multcount)↑(*divcount)↑(*advcount)↑(*setcount)
The computation is fairly straightforward, except that it involves
a loop inside a loop; therefore |\testprimality| introduces an extra
set of braces, to keep the inner loop control from interfering with
the outer loop. The braces make it necessary to say `|\global|' when
|\ifprime| is being set true or false. \TeX\ spent more time constructing
that sentence than it usually spends on an entire page; the
|\trialdivision| macro was expanded 132 times. % cpu time was 4 sec
\ddanger The |\loop| macro that does all these wonderful things is
actually quite simple. It puts the code that's supposed to be repeated
into a control sequence called |\body|, and then another control
sequence iterates until the condition is false:
\begintt
\def\loop#1\repeat{\def\body{#1}\iterate}
\def\iterate{\body\let\next=\iterate\else\let\next=\relax\fi\next}
\endtt
The expansion of\/ |\iterate| ends with the expansion of\/ |\next|; therefore
\TeX\ is able to remove |\iterate| from its memory before
invoking |\next|, and the memory does not fill up during a long loop.
↑(iteration) ↑(tail recursion)
\ddanger The |\hex| macro below, which converts |\count\n| to ↑{hexadecimal
notation}, illustrates a {\sl recursive\/} control structure in which
many copies of\/ |\hex| can be active simultaneously. ↑{Recursion} works better
than simple |\loop| iteration in this application because the hexadecimal
digits are discovered from right to left, while they must be output
from left to right.
\begintt
\def\hex{{\setcount0=\count\n \divcount\n by16
\ifnum\count\n>0 \hex\fi \setcount1=\count\n \multcount1 by-16
\setcount\n=\count0 \advcount0 by\count1 \hexdigit}}
\def\hexdigit{\ifnum\count0<10 \number\count0
\else\advcount0 by-10 \advcount0 by`A \char\count0 \fi}
\endtt
\ddanger Our final example is a macro that computes the length of its
argument; for example, `|\length{argument}|' expands to `|8|'. This
illustrates yet another aspect of macro technique.
\begintt
\def\length#1{{\setcount0=0 \getlength#1\end \number\count0}}
\def\getlength#1{\ifx#1\end \let\next=\relax
\else\advcount0 by1 \let\next=\getlength\fi \next}
\endtt
\def\rhead{Definitions (aka Macros)} % my little joke
\endchapter
By this time [37 A.D.] the influence of ↑{Macro} had become supreme.
\author ↑{TACITUS}, {\sl Annals\/} (c.\thinspace120 A.D.) % book VI, ch 45
\bigskip
% Oh, you want a definition.
I hate definitions.
\author BENJAMIN ↑{DISRAELI}, {\sl Vivian Grey\/} (1826) % Book II, Chapter 6
\eject
\beginchapter Chapter 21. Making Boxes
In Chapters 11 and 12 we discussed the principles of boxes and glue, and by now
we have seen many applications of those concepts. You can get by in most cases
with the boxes that \TeX\ manufactures automatically with its paragraph
builder, its page builder, and its math formula processor; but if you want to
do nonstandard things, you have the option of making boxes by yourself.
For example, Chapter@14 points out that you can keep something from being
hyphenated or split between lines if you enclose it in an ↑{*hbox};
Chapter@19 points out that |\hbox| allows you to get ordinary text into
a displayed equation. ↑(*vbox)
\danger The purpose of the present chapter is to nail down whatever details
about boxes haven't been covered yet. Fortunately, there isn't much more
to discuss; we have already mentioned most of the rules, so this chapter is
fairly short. In fact, the previous chapters have dealt with almost
everything except the rules about rules.
\danger To make a ↑{rule box} (i.e., a solid ↑{black rectangle}), you
type `↑{*hrule}' in vertical mode or `↑{*vrule}' in horizontal mode,
followed by any or all of the specifications `↑{.width}\<dimen>',
`↑{.height}\<dimen>', `↑{.depth}\<dimen>', in any order. For example, if
\begintt
\vrule height4pt width3pt depth2pt
\endtt
appears in the middle of a paragraph, \TeX\ will typeset the black box
`\thinspace\vrule height4pt width3pt depth2pt\thinspace'. If you
specify a dimension twice, the second specification overrules the first.
If you leave a dimension unspecified, you get the following by default:
$$\halign{\indent#\hfil&\qquad\hfil#\hfil&\qquad\hfil#\hfil\cr
&|\hrule|&|\vrule|\cr
width&\tt*&$0.4\pt$\cr
height&$0.4\pt$&\tt*\cr
depth&$0.0\pt$&\tt*\cr}$$
Here `{\tt*}' means that the actual dimension depends on the context;
the rule will extend to the boundary of the smallest box that encloses it.
\hrule
\danger For example, the author typed `|\hrule|' just before typing this
paragraph, and you can see what happened: A horizontal rule, $0.4\pt$
thick, was extended across the page, because the vertical box that
encloses it turned out to be just that wide. \ (In fact, the
vertical box that encloses it is the page itself.) \ Another example
appears immediately after this paragraph, where you can see the result
of
\begintt
\hrule width5cm height1pt \vskip1pt \hrule width6cm
\endtt
\TeX\ does not put ↑{interline glue} between rule boxes and their neighbors
in a vertical list, so these two rules are exactly $1\pt$ apart.
\hrule width5cm height1pt \vskip1pt \hrule width6cm
\dangerexercise B. L. ↑{User} didn't want a horizontal rule to touch the
left margin, so he put it in a box and moved it right, like this:
\begintt
\moveright 1in \vbox{\hrule width3in}
\endtt
↑(*moveright)
But he found that this produced more space above and below the rule
than when he had simply said `|\hrule width 4in|' with no |\vbox|.
Why did \TeX\ insert more space, and what should he have done to
avoid it?
\answer The interline skip is added for vboxes, but not for rules; he
forgot to say ↑{:nointerlineskip}, before and after the |\moveright|
construction.
\danger If you specify all three dimensions of a rule, there's no essential
difference between |\hrule| and |\vrule|, since both will produce exactly
the same black box. But you must call it an |\hrule| if you want to put
it in a vertical list, and you must call it a |\vrule| if you want to put
it in a horizontal list, regardless of whether it actually looks like
a horizontal rule or a vertical rule or neither. If you say |\vrule| in
vertical mode, \TeX\ starts a new paragraph; if you say |\hrule| in
horizontal mode, \TeX\ stops the current paragraph and returns to
vertical mode.
\danger The dimensions of a rule can be negative; for example, here's
a rule whose height is $3\pt$ and whose depth is $-2\pt$:
`\thinspace\vrule height3pt depth-2pt width1in\thinspace'.
However, a rule is invisible unless its height plus depth is positive
and its width is positive. A rule whose width is negative cannot be
seen, but it acts like a ↑{backspace} when it appears in a horizontal list.
\dangerexercise Explain how the author probably obtained the rule
`\thinspace\vrule height3pt depth-2pt width1in\thinspace' in the
previous paragraph. [{\sl Hint:\/} It's one inch long.]
\answer |\vrule height3pt depth-2pt width1in|. Notice that it was necessary
to call it a |\vrule| since it appeared in horizontal mode.
\ddanger Now let's summarize all of the ways there are to specify boxes
explicitly in \TeX. \ (1)@A character by itself makes a character box, in
horizontal mode; this character is taken from the current font. \
(2)@The commands |\hrule| and |\vrule| make rule boxes, as just explained.
\ (3)@Otherwise you can make hboxes and vboxes, which fall under the
generic term ↑{<box}. A \<box> has one of the following eight forms:
$$\halign{\indent#\hfil&\quad(see Chapter #)\hfil\cr
|\hbox|\<box specification>|{|\<hlist>|}|&12\cr
|\vbox|\<box specification>|{|\<vlist>|}|&12\cr
|\vtop|\<box specification>|{|\<vlist>|}|&12\cr
|\vcenter|\<box specification>|{|\<vlist>|}|&17\cr
|\box|\<register number>&15\cr
|\copy|\<register number>&15\cr
|\vsplit|\<register number>|to|\<dimen>&15\cr
|\lastbox|&21\cr}$$
↑(*hbox)↑(*vbox)↑(*vtop)↑(*vcenter)
Here a ↑{<box specification} is either `↑{.to}\<dimen>' or
`↑{.expand}\<dimen>' or empty; this governs the setting of glue in
the horizontal or vertical lists inside the box, as explained in
Chapter@12. A \<register number> is between 0 and 255; after you say
↑{*box}, that register becomes void, but after ↑{*copy} the register
is unchanged, as explained in Chapter@15. The ↑{*vsplit} operation
is also explained in Chapter@15.
\ddanger The bottom line of the table above refers to ↑{*lastbox},
a primitive operation that hasn't been mentioned before. If the last item
on the current horizontal list or vertical list is an hbox or vbox, it
is removed from the list and it becomes the |\lastbox|; otherwise
|\lastbox| is void. This operation is allowed in ↑{internal vertical
mode}, horizontal mode, and restricted horizontal mode, but you cannot use
it to take a box from the current page in vertical mode. In math modes,
|\lastbox| is always void. At the beginning of a paragraph,
`|{\setbox0=\lastbox}|' removes the ↑{indentation box}.
\ddanger The operation ↑{*unskip} is something like |\lastbox|, except that
it applies to glue instead of to boxes. If the last thing on the current list
is a glue item (or ↑{leaders}, as explained below), it is removed. You
can't remove glue from the current page by using |\unskip| in vertical
mode, but you can say `|\vskip\minusthe|↑{*lastskip}', which has almost
the same effect.
\ddanger Chapters 24 to 26 present summaries of all \TeX's operations
in all modes, and when those summaries mention a `\<box>' they mean one
of the eight possibilities just listed. For example, you can say
`|\setbox|\<register number>|=|\<box>' in any mode, and you can say
`|\moveright|\<dimen>\<box>' in vertical modes. But you can't say
`|\setbox|\<register number>|=C|' or `|\moveright|\<dimen>|\hrule|';
if you try either of these, \TeX\ will complain that a \<box> was
supposed to be present. Characters and rules are so special, they
aren't regarded as \<box>es.
\ddangerexercise Define a control sequence |\boxit| so that
`|\boxit{|\<box>|}|' yields the given box surrounded by 3@points of space and
by ruled lines on all four sides.
$$\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt
\vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}
\setbox4=\vbox{\hsize 23pc \noindent \strut For example, the sentence you
are now reading was typeset as part of the displayed formula
|$$\boxit{\boxit{\box4}}$$|, where box@4 was created by typing
`|\setbox4=\vbox{\hsize 23pc \noindent \strut For example,
the sentence you are now reading ... \strut}|'.\strut}
\boxit{\boxit{\box4}}$$
\answer |\def\boxit#1{\vbox{\hrule\hbox{\vrule\kern3pt|\par
| \vbox{\kern3pt#1\kern3pt}\kern3pt\vrule}\hrule}}|\par
\smallskip\noindent
(The resulting box does not have the baseline of the original one;
you have to work a little bit harder to get that.)
\danger Let's look also at what can go inside a box. An hbox contains
a horizontal list; a vbox contains a vertical list. Both kinds of lists
are made up primarily of things like boxes, glue, kerns, and penalties,
as we have seen in Chapters 14 and@15. But you can also include some
special things that we haven't discussed yet, namely ``leaders''
and ``whatsits.'' Our goal in the rest of this chapter will be to
study how to make use of such exotic items.
\danger The dots you see before your eyes here
\xleaders\hbox to 6pt{$\hss\cdot\hss$}\hfil\
are called {\sl↑{leaders}\/}\break
because they lead your eyes across the page; such things are often
used in indexes or tables of contents. The general idea is to repeat a
box as many times as necessary to fill up some given space. \TeX\ treats
leaders as a special case of glue; no, wait, it's the other way
around: \TeX\ treats glue as a special case of leaders. Ordinary glue
fills space with nothing, while leaders fill space with any
desired thing. In horizontal mode you can say
$$\displaybox{|\leaders|\<box or rule>|\hskip|\<glue>}$$
↑(*leaders)
and the effect will be the same as if you had said just `|\hskip|\<glue>',
except that the space will be occupied by copies of the specified
\<box or rule>. The glue stretches or shrinks in the usual way. For example,
\begintt
\def\leaderfill{\leaders\hbox to 1em{\hss.\hss}\hfill}
\line{Alpha\leaderfill Omega}
\line{The Beginning\leaderfill The Ending}
\endtt
will produce the following two lines:
$$\vbox{
\def\leaderfill{\leaders\hbox to 1em{\hss.\hss}\hfill}
\line{Alpha\leaderfill Omega}
\line{The Beginning\leaderfill The Ending}
}$$
Here `|\hbox to 1em{\hss.\hss}|' specifies a box one em wide, with a
period in its center; the control sequence |\leaderfill| then causes this
box to be replicated when filling space in the |\line| box. \ (Plain \TeX's
↑{:line} macro makes an hbox whose width is the |\hsize|.)
\danger Notice that the ↑{dots} in the two example lines appear exactly
above each other. This is not a coincidence; it's a consequence of the
fact that the |\leaders| operation acts something like a window that lets
you see part of an infinite row of boxes. If the words `Alpha' and `Omega'
are replaced by longer words, the number of dots might be different
but the ones you see will be in the same places as before. The infinitely
replicated boxes are lined up so that they touch each other, and so that,
if you could see them all, one of them would have the same reference point as
the smallest enclosing box. Thus, |\leaders| will put a box flush with the
left edge of an enclosing box, if you start the leaders there; but you
won't get a box flush right unless the width of the enclosing box is
exactly divisible by the width of the repeated box. If@the repeated box
has width@$w$, and if the space to be filled is at least@$2w$, then you
will always see at least one copy of the box; but if the space is less
than@$2w$ the box may not appear, because boxes in the infinite
row are typeset only when their entire width falls into the available space.
\ddanger When leaders are isolated from each other, you might not
want them to be aligned as just described, so \TeX\ also provides
for ↑{nonaligned leaders}. In this case a box of width@$w$ will be
copied $q$@times when the space to be filled is at least@$qw$ and
less than $(q+1)w$; furthermore, the results will be centered in the
available space. There are two kinds of nonaligned leaders in \TeX,
namely ↑{*cleaders} (centered leaders) and ↑{*xleaders} (expanded
leaders). Centered leaders pack the boxes tightly next to each other
leaving equal amounts of blank space at the left and right; expanded
leaders distribute the extra space equally between the $q+1$ positions
adjacent to the $q$@boxes. For example, let's suppose that a $10\pt$-wide
box is being used in leaders that are supposed to fill $56\pt$ of space.
Five copies of the box will be used; |\cleaders| will produce $3\pt$ of
space, then the five boxes, then another $3\pt$ of space. But
|\xleaders| will produce $1\pt$ space, then the first box, then
another $1\pt$ space, then the second box, \dots, then the fifth box,
and $1\pt$ space.
\ddangerexercise Suppose that a $10\pt$-wide box is to fill $38\pt$ of
space starting $91\pt$ from the left of its enclosing box. How many
copies of the box will be produced by |\leaders|, |\cleaders|, and
|\xleaders|? Where will the boxes be positioned, relative to the
left edge of the enclosing box, in each of the three cases?
\answer |\leaders|: two boxes starting at $100\pt$, $110\pt$.\par
|\cleaders|: three boxes starting at $95\pt$, $105\pt$, $115\pt$.\par
|\xleaders|: three boxes starting at $93\pt$, $105\pt$, $117\pt$.
\ddanger The definition of\/ |\leaderfill| given above uses a box that
is one em wide. That box contains only a `.'\thinspace; if we assume that
the character is $0.2\,$em wide, there is $0.4\,$em of blank space
at both sides of the box. Therefore the |\leaders| construction
will leave between $0.4\,$em and $1.4\,$em of blank space between the
periods and the text at either end. Redefine |\leaderfill| so that
the amount of blank space at either end will be between $0.1\,$em and
$1.1\,$em, but the leaders on adjacent lines will still be aligned
with each other.
\answer |\def\leaderfill{\kern-0.3em\leaders\hbox to 1em{\hss.\hss}%|\par
| \hskip0.6em plus1fill \kern-0.3em }|
\danger Instead of giving a \<box> in the leaders construction, you can
give a \<rule>, which means either |\hrule| or |\vrule|, followed
by optional ↑{.height}, ↑{.width}, and ↑{.depth} specifications as
usual. The rule will then be made as wide as the corresponding@\<glue>.
This is a case where |\hrule| makes sense in horizontal mode, because
it gives a ↑{horizontal rule in text}. For example, if the
|\leaderfill| macro in our earlier illustration is changed to
\begintt
\def\leaderfill{ \leaders\hrule\hfill\ }
\endtt
then the results look like this:
$$\vbox{
\def\leaderfill{ \leaders\hrule\hfill\ }
\line{Alpha\leaderfill Omega}
\line{The Beginning\leaderfill The Ending}
}$$
When a rule is used instead of a box, it fills the space completely, so
there's no difference between |\leaders|, |\cleaders|, and |\xleaders|.
\ddangerexercise What does |\leaders\vrule\hfill| produce?
\answer Since no |height| or |depth| specification follows the |\vrule|,
the height and depth are `|*|'; i.e., the rule extends to the smallest
enclosing box. This usually makes a heavy black band, which is too
horrible to demonstrate here. However, it does work in the ↑{:downbracefill}
macro of Appendix@B\null; and |\leaders\vrule\vfill| works fine in
vertical mode.
\ddanger Leaders work in vertical mode as well as in horizontal mode. In this
case vertical glue (e.g., |\vskip|\<glue> or |\vfill|) is used instead of
horizontal glue, and |\leaders| produces boxes that are aligned so that the
top of each repeated box has the same vertical position as the top
of the smallest enclosing box, plus a multiple of the height-plus-depth
of the repeated box. No interlineskip glue is placed between boxes in
vertical leaders; the boxes are just stacked right on top of each other.
\ddanger If you specify horizontal leaders with a box whose width isn't
positive, or if you specify vertical leaders with a box whose
height-plus-depth isn't positive, \TeX\ silently ignores the leaders
and produces ordinary glue instead.
\ddangerexercise Explain how you can end a paragraph with a rule that
is at least $10\pt$ long and extends all the way to the right
margin, like this:
\null\nobreak\leaders\hrule\hskip10pt plus1filll\ \par
\answer For example, say
\begintt
\null\nobreak\leaders\hrule\hskip10pt plus1filll\ \par
\endtt
The `|\|\]' provides extra glue that is wiped out by the implied |\unskip|
at the end of every paragraph (see Chapter@14), and the `|\null\nobreak|'
makes sure that the leaders do not disappear at a line break; `↑{.filll}'
overtakes the ↑{*parfillskip} glue.
\ddanger Since horizontal leaders are like horizontal glue, they have no
height or depth, when \TeX\ calculates the size of the enclosing box,
even though they replicate a box that does have height or depth.
Similarly, vertical leaders have no width.
\ddangerexercise Demonstrate how to produce the following `\TeX ture'
$$\vbox to .5in{\vfill
\hbox to 2.5in{\cleaders\vbox to .5in{\hphantom{\hbox{\TeX}}
\cleaders\hbox{\TeX}\vfil}\hfil}}$$
by using vertical leaders inside of horizontal leaders. \ (The \TeX\ logo
has been put into a rectangular box, and copies of this box have been packed
together tightly.) ↑(TeX logo)
\answer The tricky part here is that the vertical leaders must be given
some width, or they would produce a box of width zero; such a box is
unsuitable for replication in horizontal leaders. Similarly, the
horizontal leaders produce a box of height zero, so they are enclosed
in a |\vbox| of appropriate height.
\begintt
$$\vbox to .5in{\vfill
\hbox to 2.5in{\cleaders\vbox to .5in{\hphantom{\hbox{\TeX}}
\cleaders\hbox{\TeX}\vfil}\hfil}}$$
\endtt
\ddangerexercise Use vertical leaders to solve exercise 20.\punishexno.
\answer We assume that a strut is $12\pt$ tall, and that 50 lines
fit on a page:
\begintt
\setbox0=\hbox{\strut I must not talk in class.}
\null\cleaders\copy0\vskip600pt\vfill\eject % 50 times on page 1;
\null\cleaders\box0\vskip600pt\bye % 50 more on page 2.
\endtt
The ↑{:null} keeps glue (and leaders) from disappearing at the top of
the page.
\danger The ↑{:overbrace} and ↑{:underbrace} macros of plain \TeX\ are
constructed by combining characters with rules. Font ↑{.cmathx} contains
four symbols $\bracelu\;\braceld\;\bracerd\;\braceru$, each of which has
depth zero and height equal to the thickness of a rule that joins them
properly. Therefore it's easy to define ↑{:upbracefill} and ↑{:downbracefill}
macros so that you can obtain, e.g., ↑(braces)
$$\displayvbox{\kern4pt
\hbox to 100pt{\downbracefill}\hbox to 50pt{\upbracefill}
}$$
by saying `|\hbox to 100pt{\downbracefill}\hbox to 50pt{\upbracefill}|'
in vertical mode. Details of those macro definitions appear in Appendix@B.
\ddanger The definition of\/ ↑{:overarrow} in Appendix B is more complex than
that of\/ |\overbrace|, because it involves a box instead of a rule. The
fonts of plain \TeX\ are designed so that symbols like $\leftarrow$ and
$\to$ can be extended with ↑{minus signs}; similarly, $\Leftarrow$ and
↑(arrows)
$\Rightarrow$ can be extended with ↑{equals signs}. However, you can't
simply put the characters next to each other, because that leaves
gaps (`${\leftarrow}{-}{-}$' and `${\Leftarrow}{=}{=}$'). It is necessary
to backspace a little between characters. An additional complication
arises because the extension line in a long arrow might need to be some
non-integer number of minus signs long. To solve this problem, the
↑{:rightarrowfill} macro in Appendix@B uses |\cleaders| with a repeatable
box consisting of the middle 10 units of a minus sign, where one unit
is ${1\over18}\,$em. The leaders are preceded and followed by $-$ and@$\to$;
there's enough backspacing to compensate for up to 5@units of extra space,
fore and aft, that |\cleaders| might leave blank. In this way a macro is
obtained such that
\begintt
\hbox to 100pt{\rightarrowfill}
\endtt
yields `\hbox to 100pt{\rightarrowfill}'.
\ddanger Now we know all about leaders. What about ↑{whatsits}? Well,
whatsits have been provided as a general mechanism by which important
special printing applications can be handled as extensions to \TeX.
It's possible for system wizards to modify the \TeX\ program, without
changing too much of the code, so that new features can be accommodated
at high speed instead of encoding them in macros. The author hopes that
such extensions will not be made very often, because he doesn't want
incompatible pseudo-\TeX\ systems to proliferate; yet he realizes that
certain special books deserve a special treatment. Whatsits make it
possible to incorporate new things into boxes without bending the existing
conventions too much. But they make applications less portable from
one machine to another.
\ddanger Two kinds of whatsits are defined as part of all \TeX\
implementations. They aren't really ↑{extensions to \TeX}, but
they are coded as if they were, so that they provide a model of how
other extensions could be made. The first of these is connected with
output to text files, and it involves the \TeX\ primitive commands
↑{*openout}, ↑{*closeout}, ↑{*write}, and ↑{*immediate}. The second is
connected with special instructions that can be transmitted to
printing devices, via \TeX's ↑{*special} command.
\ddanger The ability to write text files that can later by input by other
programs (including \TeX) makes it possible to take care of tables of
contents, indexes, and many other things. You can say
`|\openout|\<number>|=|\<file name>' and `|\closeout|\<number>' by analogy
with the |\openin| and |\closein| commands of Chapter@20; the
\<number> must be between 0 and@15. The filename is usually extended with
`↑{..tex}' if it has no extension. There is a |\write| command that writes
one line to a file, analogous to the |\read| command that reads one line;
you say
$$\displaybox{|\write|\<number>|{|\<token list>|}|}$$
and the material goes out to the file that corresponds to the given stream
number. If the \<number> is negative or greater than@15, or if the
specified stream has no file open for output, the output goes to the
user's terminal and into the log file. Plain \TeX\ has a ↑{:newwrite}
command that allocates output stream numbers from 0 to@15. Output streams
are completely independent of input streams.
\ddanger However, the output actually takes place in a delayed fashion;
the |\openout|, |\closeout|, and |\write| commands that you give are not
performed when \TeX\ sees them. Instead, \TeX\ puts these commands into
whatsit items, and places them into the current horizontal
or vertical or math list that is being built. No actual output will occur
until this whatsit is eventually shipped out to the |dvi| file, as part
of a larger box. The reason for this delay is that |\write| is often
used to make an index or table of contents, and the exact page on which
a particular item will appear is generally unknown when the |\write|
instruction occurs in mid-paragraph. \TeX\ is usually working ahead,
reading an entire paragraph before breaking it into lines, and accumulating
more than enough lines to fill a page before deciding what goes on
the page, as explained in Chapters 14 and@15. Therefore a deferred writing
mechanism is the only safe way to insure the validity of page number
references.
\ddanger The \<token list> of a |\write| command is first stored in a
whatsit without performing any macro expansion; the macro expansion takes
place later, when \TeX\ is in the middle of a ↑{*shipout} operation. For
example, suppose that some paragraph in your document contains the text
\begintt
... For \write\inx{example: \the\count0}example, suppose ...
\endtt
Then the horizontal list for the paragraph will have a whatsit just before
the word `|example|', and just after the interword space following@`|For|'.
This whatsit item contains the unexpanded token list
`|example: \the\count0|'. It sits dormant while the paragraph is being
broken into lines and put on the current page. Let's suppose that
this word `|example|' (or some hyphenated initial part of it, like
`|ex-|') is shipped out on page@256. Then \TeX\ will write the line
\begintt
example: 256
\endtt
on output stream |\inx|, because the `|\the\count0|' will be expanded
at that time. Of course, |\write| commands are usually generated by
macros; they are rarely typed explicitly in mid-paragraph.
\ddanger \TeX\ defers |\openout| and |\closeout| commads by putting them
into whatsits too; thus, the relative order of output commands will be
preserved, unless boxes are shipped out in some other order due to
insertions or such things.
\ddanger Sometimes you don't want \TeX\ to defer a |\write| or an
|\openout| or |\closeout|. You could say, e.g., `|\shipout\hbox{\write...}|',
but that would put an unwanted empty page in your |dvi| file. So \TeX\ has
another feature that gets around this problem: If you type `↑{*immediate}'
just before |\write| or |\openout| or |\closeout|, the operation will
be performed immediately, and no whatsit will be made. For example,
\begintt
\immediate\write-1{Goodbye}
\endtt
prints `|Goodbye|' on your terminal. Without the |\immediate|, you wouldn't
see the `|Goodbye|' until the current list was output. \ (In fact, you
might never see it; or you may see it more than once, if the
current list goes into a box that was copied.) \ An `|\immediate\write-1|'
differs from ↑{*message} in that |\write| prints the text on a line by itself;
the results of several |\message| commands might appear on the same line,
separated by spaces.
\ddanger The \<token list> of a |\write| ought to be rather short, since
it makes one line of output. Some implementations of \TeX\ are unable to
write long lines. If you want to write a lot of stuff, just give
several |\write| commands. Each line is written in the form that \TeX\ uses
to display token lists in the ↑{*show} command: characters represent
themselves (except that you get duplicated characters like |##| for macro
parameter characters ↑(sharpsign)), and unexpandable control sequence tokens
produce their names, preceded by `|\|' and followed by a space (unless the
name is an active character or a control sequence formed from a single
nonletter).
\ddanger \TeX\ ignores |\write|, |\openout|, and |\closeout| whatsits that
appear within boxes governed by ↑{leaders}. If you are upset about this,
you shouldn't be.
\ddanger Since the \<token list> of a deferred |\write| is expanded at a
fairly random time (when |\shipout| occurs), you should be careful about
what control sequences it is allowed to contain. The techniques of Chapter@20
for controlling macro expansion often come in handy with respect to |\write|.
\ddangerexercise Suppose that you want to |\write| a token list that
involves a macro |\chapno|, containing the current chapter number, as well as
`|\the\count0|' which refers to the current page. You want |\chapno| to
be expanded immediately, because it might change before the token list
is written; but you want |\the\count0| to be expanded at the time of
|\shipout|. How can you manage this?
\answer |{\let|\stretch|\the=0\edef|\stretch|\next|\stretch
|{\write|\stretch|\cont|\stretch|{|\<token list>|}}\next|\stretch|}| will
expand everything but |\the| when the |\write| command is given.
\ddanger Now let's wrap up our study of boxes by considering one more feature.
The command `↑{*special}|{|\<token list>|}|' can be given in any
mode. Like |\write|, it puts its token list into a whatsit; and like
|\message|, it expands the token list immediately. This token list will be
output to the |dvi| file with the other typesetting commands that
\TeX\ produces. Therefore it is implicitly associated with a particular
position on the page, namely the reference point that would have been present
if a box of height, depth, and width zero had appeared in place of the
whatsit. The \<token list> in a |\special| command should consist of a
keyword followed if necessary by a space and appropriate arguments. For example,
\begintt
\special{halftone pic1}
\endtt
might mean that a ↑{picture} on file |pic1| should be inserted ↑(halftones)
on the current page, with its reference point at the current position.
\TeX\ doesn't look at the token list to see if it makes any sense; the
list is simply copied to the output. However, you should be careful not to
make the list too long, or you might overflow \TeX's string memory.
\ddanger Software programs that convert |dvi| files to printed or
displayed output should be able to fail gracefully when they don't
recognize your special keywords. Thus, |\special| operations should
never do anything that changes the current position. Whenever you use
|\special|, you are taking a chance that your output file will not be
printable on all output devices, because all |\special| functions are
extensions to \TeX. However, the author anticipates that certain standards
for common graphic operations will emerge in the \TeX\ user community,
after careful experiments have been made by different groups of people;
then there will be a chance for some uniformity in the use of
|\special| extensions.
\endchapter
If age or weaknesse do prohibite bloud-letting,
you must use boxing.
PHILIP ↑{BARROUGH}, {\sl The Method of Physick\/} (15xx)
% 1.v (1639) 7, acc to OED; I have this on order
\bigskip
The only thing that never looks right is a rule.
There is not in existence a page with a rule on it
that cannot be instantly and obviously improved
by taking the rule out.
% "Even dashes, cherished as they are by authors who cannot punctuate,
% spoil a page. They are generally merely ignorant substitutes for colons."
\author GEORGE BERNARD ↑{SHAW}, in {\sl The Dolphin\/} (1940) % v4 p81
\eject
\beginchapter Chapter 22. Alignment
Printers charge extra when you ask them to typeset ↑{tables}, and they do so
for good reason: Each table tends to have its own peculiarities, so it's
necessary to give some thought to each one, and to fiddle with alternative
approaches until finding something that looks good and communicates well.
However, you needn't be too frightened of doing tables with \TeX, since plain
\TeX\ has a ``tab'' feature that handles simple situations pretty much like
you would do them on a typewriter. Furthermore, \TeX\ has a powerful
alignment mechanism that makes it possible to cope with extremely complex
tabular arrangements. Simple cases of these ↑{alignment} operations will
suffice for the vast majority of applications.
Let's consider ↑{tabbing} first. If you say `↑{:settabs} $n$ |\columns|',
plain \TeX\ makes it easy to produce lines that are divided into $n$@equal-size
columns. Each line is specified by typing
$$\displaybox{|\+|\<text$_1$>|&|\<text$_2$>|&|$\,\cdots\,$|\cr|}$$
where \<text$_1$> will start flush with the left margin,
\<text$_2$> will start at the left of the second column, and so on. Notice
that `↑{:+}' starts the line. The final column is followed by `↑{*cr}',
which old-timers will recognize as an abbreviation for the ``↑{carriage
return}'' operation on typewriters that had carriages. For example,
consider the following specification:
\begintt
\settabs 4 \columns
\+&&Text that starts in the third column\cr
\+&Text that starts in the second column\cr
\+\it Text that starts in the first column, and&&&
the fourth, and&beyond!\cr
\endtt
After `|\settabs|\stretch|4\columns|' each |\+| line is divided into quarters,
so the result@is
\medskip
\settabs 4 \columns
\+&&Text that starts in the third column\cr
\+&Text that starts in the second column\cr
\+\it Text that starts in the first column, and&&&
the fourth, and&beyond!\cr
\def\tick{\kern-0.2pt % that's half the rule width
\vbox to 0pt{\kern-36pt\leaders\hbox{\vrule height1pt\vbox to4pt{}}\vfil}}
\vskip\minusthe\baselineskip
\+\tick&\tick&\tick&\tick&\tick\cr
\medskip
This example merits careful study because it illustrates several things.
(1)@The `|&|' ↑(ampersand) is like the {\sc TAB} key on many typewriters;
it tells \TeX\ to advance to the next tab position, where there's a tab at
the left edge of each column. In this example, \TeX\ has set up four tabs,
indicated by the dashed lines; a dashed line is also shown at the left
margin, although there isn't really a tab there. (2)@But `|&|' isn't
exactly like a mechanical typewriter {\sc TAB}, because it first backs up
to the beginning of the current column before advancing to the next. In
this way you can always tell what column you're tabbing to, by counting
the number of |&|'s; that's handy, because variable-width type otherwise
makes it difficult to know whether you've passed a tab position or not.
Thus, on the last line of our example, three |&|'s were typed in order to
get to column@4, even though the text had already extended into column@2
and perhaps into column@3. (3)@You can say `|\cr|' before you have
specified a complete set of columns, if the remaining columns are blank.
(4)@The |&|'s are different from tabs in another way, too: \TeX\ ignores
↑{spaces} after@`|&|', hence you can conveniently finish a column by typing
`|&|'@at the end of a line in your input file, without worrying that an
extra blank space will be introduced there. \ (The second-last line of the
example ends with@`|&|', and there is an implicit blank space following that
symbol; if \TeX\ hadn't ignored that space, the words `the fourth'
wouldn't have started exactly at the beginning of the fourth column.) \
Incidentally, plain \TeX\ also ignores spaces after `|\+|', so that the
first column is treated like the others. (5) The `↑{:it}' in the last line
of the example causes only the first column to be italicized, even though
no ↑{braces} were used to confine the range of italics, because \TeX\
implicitly inserts braces around each individual entry of an alignment.
\danger Once you have issued a |\settabs| command, the tabs remain set until you
reset them, even though you go ahead and type ordinary paragraphs as usual.
But if you enclose |\settabs| in |{...}|, the tabs defined inside a group
don't affect the tabs outside; `|\global\settabs|' is not permitted.
\danger Tabbed lines usually are used between paragraphs, in the same
situations as you would type ↑{:line} or ↑{:centerline} to get lines with
a special format. But it's also useful to put |\+|@lines inside a |\vbox|;
this makes it convenient to specify ↑{displays} that contain aligned
material. For example, if you type
\begintt
$$\vbox{\settabs 3 \columns
\+This is&a strange&example\cr
\+of displayed&three-column&format.\cr}$$
\endtt
you get the following display:
$$\vbox{\settabs 3 \columns
\+This is&a strange&example\cr
\+of displayed&three-column&format.\cr}$$
In this case the first column doesn't appear flush left, because \TeX\
centers a box that is being displayed. Columns that end with |\cr| in
a |\+|@line are put into a box with their natural width; so the first
and second columns here are one-third of the |\hsize|, but the third column
is only as wide as the word `example'. We have used |$$| ↑(dollardollar)
in this construction even though no mathematics is involved, because |$$|
does other useful things; for example, it centers the box, and it inserts
space above and below.
People don't always want tabs to be equally spaced, so there's another
way to set them, by typing `|\+|\<sample line>|\cr|' immediately after
`|\settabs|'. In this case tabs are placed at the positions
of the |&|'s in the sample line, and the sample line itself does not appear
in the output. For example,
\begintt
\settabs\+\indent&Horizontal lists\quad&\cr % sample line
\+&Horizontal lists&Chapter 14\cr
\+&Vertical lists&Chapter 15\cr
\+&Math lists&Chapter 17\cr
\endtt
causes \TeX\ to typeset the following three lines of material:
\nobreak\medskip
\settabs\+\indent&Horizontal lists\quad&\cr
\+&Horizontal lists&Chapter 14\cr
\+&Vertical lists&Chapter 15\cr
\+&Math lists&Chapter 17\cr
\medbreak\noindent
The |\settabs| command in this example makes column@1 as wide as a paragraph
↑(indention, see indentation)
indentation; and column@2 is as wide as `Horizontal lists' plus one quad of
space. ↑(:quad) Only two tabs are set in this case, because only two |&|'s
appear in the sample line. \ (A sample line might as well end with@|&|,
because the text following the last tab isn't used for anything.)
The first line of a table can't always be used as a sample line, because it
won't necessarily give the correct tab positions. In a large table you have
to look ahead and figure out the biggest entry in each column; the sample
line is then constructed by typing the widest first column, the widest
second column, etc., omitting the last column. Be sure to include some
extra space between columns in the sample line, so that the columns
won't touch each other.
\exercise Explain how to typeset the following table [from Beck,
Bertholle, and Child, {\sl Mastering the Art of French Cooking\/}
(New York: Knopf, 1961)]: % p283
↑(Beck, Simone) ↑(Bertholle, Louisette) ↑(Child, Julia)
\nobreak\medskip
\settabs\+\indent&10$1\over2$ lbs.\qquad&\it Servings\qquad&\cr
\+&\negthinspace\it Weight&\it Servings&
{\it Approximate Cooking Time\/}*\cr
\smallskip
\+&8 lbs.&6&1 hour and 50 to 55 minutes\cr
\+&9 lbs.&7 to 8&About 2 hours\cr
\+&9$1\over2$ lbs.&8 to 9&2 hours and 10 to 15 minutes\cr
\+&10$1\over2$ lbs.&9 to 10&2 hours and 15 to 20 minutes\cr
\smallskip
\+&* For a stuffed goose,
add 20 to 40 minutes to the times given.\cr
\answer Notice the uses of `|\smallskip|' here to separate the table heading
and footing from the table itself; such refinements are often worthwhile.
\begintt
\settabs\+\indent&10$1\over2$ lbs.\qquad&\it Servings\qquad&\cr
\+&\negthinspace\it Weight&\it Servings&
{\it Approximate Cooking Time\/}*\cr
\smallskip
\+&8 lbs.&6&1 hour and 50 to 55 minutes\cr
\+&9 lbs.&7 to 8&About 2 hours\cr
\+&9$1\over2$ lbs.&8 to 9&2 hours and 10 to 15 minutes\cr
\+&10$1\over2$ lbs.&9 to 10&2 hours and 15 to 20 minutes\cr
\smallskip
\+&* For a stuffed goose,
add 20 to 40 minutes to the times given.\cr
\endtt
The title line specifies `|\it|' three times, because each entry between
tabs is treated as a group by \TeX; you would get error messages galore
if you tried to say something like \hbox{`|\+&{\it Weight&Servings&...}\cr|'}.
The `↑{:negthinspace}' in the title line is a small backspace that
compensates for the slant in an italic {\it W\/}; the author inserted
this somewhat unusual correction after seeing how the table looked
without it, on the first proofs. \ (You weren't supposed to think of this,
but it has to be mentioned.) \ Another way to treat this table would be to
display it in a vbox, instead of including a first column whose sole
purpose is to specify indentation.
\ninepoint % it's all dangerous from here to end of chapter
\danger If you want to put something ↑{flush right} in its column, just type
`↑{*hfill}' before it; and be sure to type `|&|' after it, so that
\TeX\ will be sure to move the information all the way until it touches
the next tab. Similarly, if you want to ↑{center} something in its
column, type `|\hfill|' before it and `|\hfill&|' after it. For example,
\begintt
\settabs 2 \columns
\+\hfill This material is flush right&
\hfill This material is centered\hfill&\cr
\+\hfill in the first half of the line.&
\hfill in the second half of the line.\hfill&\cr
\endtt
produces the following little table:\enddanger
\nobreak\medskip
\settabs 2 \columns
\+\hfill This material is flush right&
\hfill This material is centered\hfill&\cr
\+\hfill in the first half of the line.&
\hfill in the second half of the line.\hfill&\cr
\danger The |\+| macro in Appendix@B works
by putting the \<text> for each column into an hbox as follows:
$$\displaybox{|\hbox to |\<column width>|{|\<text>|\hss}|}$$
The ↑{*hss} means that the text is normally flush left, and that it can
extend to the right of its box. Since |\hfill| is ``more infinite'' than
|\hss| in its ability to stretch, it has the effect of right-justifying or
centering as stated above. Furthermore, |\hfill| doesn't shrink, so \TeX\
will complain of an overfull box if something doesn't fit in its column.
You could also center some text by putting `|\hss|' before it and just
`|&|' after it; in that case the text would be allowed to extend to the
left and right of its column, and the box would never be considered
overfull. The last column of a |\+|@line (i.e., the column entry that is
followed by |\cr|) is treated differently, however; |\hss| is not inserted
into it, and the \<text> is simply put into an hbox with its natural width.
\danger ↑{Computer programs} present difficulties of a different kind, since
some people like to adopt a style in which the tab positions change from
line to line. For example, consider the following program fragment:
$$\vbox{\+\bf if $n<r$ \cleartabs&\bf then $n:=n+1$\cr
\+&\bf else &{\bf begin} ${\it print\_totals}$; $n:=0$;\cr
\+&&{\bf end};\cr
\+\bf while $p>0$ do\cr
\+\quad\cleartabs&{\bf begin} $q:={\it link}(p)$;
${\it free\_node}(p)$; $p:=q$;\cr
\+&{\bf end};\cr}$$
Special tabs have been set up so that `{\bf then}' and `{\bf else}' appear
one above the other, and so do `{\bf begin}' and `{\bf end}'. It's possible
to achieve this by setting up a new sample line whenever a new tab position
is needed; but that's a tedious job, so plain \TeX\ makes it a little simpler.
Whenever you type |&| to the right of all existing tabs, the effect is to
set a new tab there, in such a way that the column just completed will have
its natural width. Furthermore, there's an operation `↑{:cleartabs}' that
resets all tab positions to the right of the current column. Therefore the
computer program above can be \TeX ified as follows:
$$\halign{\indent#\hfil\cr
|$$\vbox{\+\bf if $n<r$ \cleartabs&\bf then $n:=n+1$\cr|\cr
| \+&\bf else &{\bf begin} ${\it print\_totals}$; $n:=0$;\cr|\cr
| \+&&{\bf end};\cr|\cr
| |\<The remaining part is left as an exercise>|}$$|\cr}$$
\dangerexercise Complete the example computer program by specifying three more
|\+|@lines.
\answer In such programs it seems best to type |\cleartabs| just before |&|,
whenever it is desirable to reset the old tabs. Multiletter identifiers look
best when set in text italics with ↑{:it}, as explained in Chapter@18.
Thus, the following is recommended:
\begintt
\+\bf while $p>0$ do\cr
\+\quad\cleartabs&{\bf begin} $q:={\it link}(p)$;
${\it free\_node}(p)$; $p:=q$;\cr
\+&{\bf end};\cr
\endtt
\danger Although |\+| lines can be used in vertical boxes, you must never
use |\+| inside of another |\+| line. The |\+| macro is intended for
simple applications only.
\ddanger The |\+| and |\settabs| macros of Appendix B keep track of tabs by
maintaining register |\box|↑{:tabs} as a box full of empty boxes whose
widths are the column widths in reverse order. Thus you can examine the
tabs that are currently set, by saying `↑{*showbox}|\tabs|'; this puts
the column widths into your log file, from right to left. For example,
after `|\settabs\+\hskip100pt&\hskip200pt&\cr\showbox\tabs|', \TeX\
will show the lines
\begintt
\hbox(0.0+0.0)x300.0
.\hbox(0.0+0.0)x200.0
.\hbox(0.0+0.0)x100.0
\endtt
\ddangerexercise (Study the |\+| macro in Appendix B and figure out how to
change it so that tabs work as they do on a mechanical typewriter
(i.e., so that `|&|' always moves to the next tab that lies strictly
to the right of the current position). Assume that the user
doesn't backspace past previous tab positions; for example, if the input is
\hbox{`|\+&&\hskip-2em&x\cr|'}, do not bother to put `x' in the first or
second column, just put it at the beginning of the third column. \
(This exercise is a bit difficult.)
\answer Here we retain the idea that |&| inserts a new tab, when there
are no tabs to the right of the current position. Only one of the macros
that are used to process |\+|@lines needs to be changed; but
(unfortunately) it's the most complex one:
\begintt
\def\tabbox(b){\ifcr\rb % now \box\() holds the column
\else\hss\rb \setdimen\()=0pt
\setdimen\(i)=1wd\()\advdimen\(i) by1sp
\loop\ifdim \dimen\()<\dimen\(i)
\global\setbox\tabsyet=\hbox{\unhbox\tabsyet
\global\setbox\(i)=\lastbox}%
\ifvoid\(i) \advdimen\(i) by-\dimen\()
\advdimen\(i) by-1sp \global\setbox\(i)
=\hbox to\dimen\(i){}\setdimen\(i)=-1pt\fi
\advdimen\() by1wd\(i) \global\setbox\tabsdone
=\hbox{\box\(i)\unhbox\tabsdone}\repeat
\setbox\()=\hbox to\dimen\(){\unhbox\()}\fi
\box\()}
\endtt
\danger \TeX\ has another important way to make tables, using an operation
called ↑{*halign} (``horizontal alignment''). In this case the table format
is based on the notion of a {\sl↑{template}}, not on tabbing; the idea
is to specify a separate environment for the text in each column.
Individual entries are inserted into their templates, and presto, the
table is complete.
\danger For example, let's go back to the Horizontal/Vertical/Math list
example that appeared earlier in this chapter; we can specify it with
|\halign| instead of with tabs. The new specification is
\begintt
\halign{\indent#\hfil&\quad#\hfil\cr
Horizontal lists&Chapter 14\cr
Vertical lists&Chapter 15\cr
Math lists&Chapter 17\cr}
\endtt
and it produces exactly the same result as the old one. This example
deserves careful study, because |\halign| is really quite simple once
you get the hang of it. The first line contains the {\sl ↑{preamble}\/} to
the alignment, which is something like the sample line used to set tabs
for@|\+|. In this case the preamble contains two templates, namely
`|\indent#\hfil|' for the first column and `|\quad#\hfil|' for the
second. Each template contains exactly one appearance of `|#|', ↑(sharp)
and it means ``stick the text of each column entry in this place.''
Thus, the first column of the line that follows the preamble becomes
\begintt
\indent Horizontal lists\hfil
\endtt
when `|Horizontal lists|' is stuffed into its template; and the second
column, similarly, becomes `|\quad Chapter 14\hfil|'. The question is,
why |\hfil|? Ah, now we get to the interesting point of the whole thing:
\TeX\ reads an entire |\halign{...}| specification into its memory
before typesetting anything, and it keeps track of the maximum width
of each column, assuming that each column is set without stretching or
shrinking the glue. Then it goes back and puts every entry into a box,
setting the glue so that each box has the maximum column width. That's
where the |\hfil| comes in; it stretches to fill up the extra space in
narrower entries.
\dangerexercise What table would have resulted if the template for the
first column in this example had been `|\indent\hfil#|' instead of
`|\indent#\hfil|'?
\answer \par\nobreak\vskip\minusthe\baselineskip
\halign{\indent\hfil#&\quad#\hfil\cr
Horizontal lists&Chapter 14\cr\noalign{\nobreak}
Vertical lists&Chapter 15\cr\noalign{\nobreak}
Math lists&Chapter 17\qquad (i.e., the first column would be
right-justified)\cr}
\danger Before reading further, please make sure that you understand the
idea of templates in the example just presented. There are several
important differences between |\halign| and@|\+|: (1)@|\halign| calculates
↑(halign compared to tabbing)
the maximum column widths automatically; you don't have to guess what the
longest entries will be, as you do when you set tabs with a sample line.
(2)@Each |\halign| does its own calculation of column widths; you have to
do something special if you want two different |\halign| operations to
produce identical alignments. By contrast, the |\+| operation remembers tab
positions until they are specifically reset; any number of paragraphs and
even |\halign| operations can intervene between |\+|'s, without affecting
the tabs. (3)@Because |\halign| reads an entire table in order to
determine the maximum column widths, it is unsuitable for huge tables
that fill several pages of a book. By contrast, the@|\+|@operation deals
with one line at a time, so it places no special demands on \TeX's memory.
\ (However, if you have a huge table, you should probably define your own
special-purpose macro for each line instead of relying on the general
|\+|@operation.) (4)@|\halign| takes less computer time than |\+|@does,
because |\halign| is a built-in command of \TeX, while |\+|@is a macro
that has been coded in terms of\/ |\halign| and various other primitive
operations. (5)@Templates are much more versatile than tabs, and they can
save you a lot of typing. For example, the Horizontal/Vertical/Math list
table could be specified more briefly by noticing that there's common
information in the columns:
\begintt
\halign{\indent# lists\hfil&\quad Chapter #\cr
Horizontal&14\cr Vertical&15\cr Math&17\cr}
\endtt
You could even save two more keystrokes by noting that the chapter numbers
all start with `|1|'\thinspace! \ (Caution: It takes more time to think of
optimizations like this than to type things in a straightforward way;
do it only if you're bored and need something amusing to keep up
your interest.)\ (6)@On the other hand, templates are no substitute for
tabs when the tab positions are continually varying, as in the
computer program example.
\danger Let's do a more interesting table, to get more
experience with |\halign|. Here is another example based on the
↑{Beck}/↑{Bertholle}/↑{Child} book cited earlier:
$$\vbox{\openup{2pt}
\halign{\hfil\bf#&\quad\hfil\it#\hfil&\quad\hfil#\hfil&
\quad\hfil#\hfil&\quad#\hfil\cr
\sl American&\sl French&\sl Age&\sl Weight&\sl Cooking\cr
\noalign{\vskip-2pt}
\sl Chicken&\sl Connection&\sl(months)&\sl(lbs.)&\sl Methods\cr
\noalign{\smallskip}
Squab&Poussin&2&$3\over4$ to 1&Broil, Grill, Roast\cr
Broiler&Poulet Nouveau&2 to 3&1$1\over2$ to 2$1\over2$&Broil, Grill, Roast\cr
Fryer&Poulet Reine&3 to 5&2 to 3&Fry, Saut\'e, Roast\cr
Roaster&Poularde&5$1\over2$ to 9&Over 3&Roast, Poach, Fricassee\cr
Fowl&Poule de l'Ann\'ee&10 to 12&Over 3&Stew, Fricassee\cr
Rooster&Coq&Over 12&Over 3&Soup stock, Forcement\cr}}$$
Note that, except for the title lines, the first column is set right-justified
in boldface type; the middle columns are centered; the second column
is centered and in italics; the final column is left-justified. We would
like to be able to type the rows of the table as simply as possible; hence,
for example, it would be nice to be able to specify the bottom row by
typing only
\begintt
Rooster&Coq&Over 12&Over 3&Soup stock, Forcement\cr
\endtt
without worrying about type styles, centering, and so on. This not only
cuts down on keystrokes, it also reduces the chances for making typographical
errors. Therefore the template for the first column should be
`|\hfil\bf#|'; for the second column it should be `|\hfil\it#\hfil|' to
get the text centered and italicized; and so on. We also need to allow
for space between the columns, say one quad. {\it Voil\`a! La typographie
est sur la table:\/}\looseness=-1
$$\halign{\indent#\hfil\cr
|\halign{\hfil\bf#&\quad\hfil\it#\hfil&\quad\hfil#\hfil&|\cr
| \quad\hfil#\hfil&\quad#\hfil\cr|\cr
\ \<the title lines>\cr
| Squab&Poussin&2&$3\over4$ to 1&Broil, Grill, Roast\cr|\cr
| ... Forcemeat\cr}|\cr}$$
As with the |\+| operation, spaces are ignored after |&|, in the preamble
as well as in the individual rows of the table. Thus, it is convenient
to end a long row with `|&|' when it takes up more than one line
in your input file.
\dangerexercise How was the `{\bf Fowl}' line typed? \ (This is too easy.)
\answer |Fowl&Poule de l'Ann\'ee&10 to 12&Over 3&Stew, Fricassee\cr|
\danger The only remaining problem in this example is to specify the title
lines, which have a different format from the others. In this case the style
is different only because the typeface is slanted, so there's no special
difficulty; we just type
\begintt
\sl American&\sl French&\sl Age&\sl Weight&\sl Cooking\cr
\sl Chicken&\sl Connection&\sl(months)&\sl(lbs.)&\sl Methods\cr
\endtt
It is necessary to say `|\sl|' each time, because each individual entry
of a table is implicitly enclosed in braces.
\danger A discriminating reader will notice that there's also a bit of
extra space between the title line and the other lines of the ↑{poultry}
table. The author inserted this extra space by typing
`↑{*noalign}|{\smallskip}|' just after the title line. In general, you can say
$$\displaybox{|\noalign{|\<vertical mode material>|}|}$$
just after any |\cr| in an |\halign|; \TeX\ will simply copy the vertical
mode material, without subjecting it to alignment, and it will appear
in place when the |\halign| is finished. You can use |\noalign| to
insert extra space, as here, or to insert penalties that affect page
breaking, or even to insert lines of text (see Chapter@19). Definitions
inside the braces of\/ |\noalign{...}| are local to that group.
\danger The |\halign| command also makes it possible for you to adjust
the spacing between columns so that a table will fill a specified area.
You don't have to decide that the ↑{inter-column space} is a quad; you can
let \TeX\ make the decisions, based on how wide the columns come out,
because \TeX\ puts ``↑{tabskip glue}'' between columns. This tabskip glue
is usually zero, but you can set it to any value you like by saying
`↑{*tabskip}|=|\<dimen>'. For example,
let's do the poultry table again, but with the beginning of the
specification changed as follows:
\begintt
\tabskip=1em plus2em minus.5em
\halign to\the\hsize{\hfil\bf#&\hfil\it#\hfil&\hfil#\hfil&
\hfil#\hfil\hfil\cr
\endtt
The main body of the table is unchanged, but the |\quad| spaces have been
removed from the preamble, and a nonzero |\tabskip| has been specified
instead. Furthermore `|\halign|' has been changed to `|\halign
to\the\hsize|'; this means that each line of the table will be put into a
box whose width is the current value of\/ ↑{*hsize}, i.e., the horizontal
line width usually used in paragraphs. The resulting table looks like this:
$$\vbox{\openup{2pt}
\tabskip=1em plus2em minus.5em
\halign to\the\hsize{\hfil\bf#&\hfil\it#\hfil&\hfil#\hfil&
\hfil#\hfil\hfil\cr
\sl American&\sl French&\sl Age&\sl Weight&\sl Cooking\cr
\noalign{\vskip-2pt}
\sl Chicken&\sl Connection&\sl(months)&\sl(lbs.)&\sl Methods\cr
\noalign{\smallskip}
Squab&Poussin&2&$3\over4$ to 1&Broil, Grill, Roast\cr
Broiler&Poulet Nouveau&2 to 3&1$1\over2$ to 2$1\over2$&Broil, Grill, Roast\cr
Fryer&Poulet Reine&3 to 5&2 to 3&Fry, Saut\'e, Roast\cr
Roaster&Poularde&5$1\over2$ to 9&Over 3&Roast, Poach, Fricassee\cr
Fowl&Poule de l'Ann\'ee&10 to 12&Over 3&Stew, Fricassee\cr
Rooster&Coq&Over 12&Over 3&Soup stock, Forcement\cr}}$$
\danger In general, \TeX\ puts tabskip glue before the first column, after
the last column, and between the columns of an alignment. You can specify
the final aligned size by saying `|\halign to|\<dimen>' or
`|\halign expand|\<dimen>', ↑(.to) ↑(.expand)
just as you can say `|\hbox to|\<dimen>' and `|\hbox expand|\<dimen>'.
This specification governs the setting of the tabskip glue; but it does
not affect the setting of the glue within column entries. \ (Those
entries have already been packaged into boxes having the maximum
natural width for their columns, as described earlier.)
\ddanger Therefore `|\halign| |to| |\the\hsize|' will do nothing if the
tabskip glue has no stretchability or shrinkability, except that it will
cause \TeX\ to report an ↑{underfull} or ↑{overfull} box. An overfull box
occurs if the tabskip glue doesn't have enough shrinkability to meet the
given specification; in this case you get an error message on the terminal
and in your log file, but there is no ``↑{overfull rule}'' to mark the
oversize table on the printed output.
\danger The poultry example just given used the same tabskip glue
everywhere, but you can vary it by resetting ↑{:tabskip} within the
preamble. The tabskip glue that is in force when \TeX\ reads the
`|{|' following |\halign| will be used before the first column;
the tabskip glue that is in force when \TeX\ reads the `|&|' after
the first template will be used between the first and second
columns; and so on. The tabskip glue that is in force when \TeX\
reads the |\cr| after the last template will be used after the
last column. For example, in
\begintt
\tabskip=3pt
\halign{\hfil#\tabskip=4pt& #\hfil&
\hbox to 10em{\hss\tabskip=5pt # \hss}\cr ...}
\endtt
the preamble specifies aligned lines that will consist of the following
seven parts:
$$\halign{\indent#\hfil\cr
tabskip glue $3\pt$;\cr
first column, with template `|\hfil#|';\cr
tabskip glue $4\pt$;\cr
second column, with template `|#\hfil|';\cr
tabskip glue $4\pt$;\cr
third column, with template `|\hbox to 10em{\hss# \hss}|';\cr
tabskip glue $5\pt$.\cr}$$
\ddanger \TeX\ copies the templates without interpreting them except to
remove any |\tabskip| glue specifications. More precisely, the tokens of the
preamble are passed directly to the templates without macro expansion;
\TeX\ looks only for `|\cr|' commands, `|&|', `|#|', `|\span|', and
`|\tabskip|'. The \<glue> following `|\tabskip|' is scanned in@the usual
way (with macro expansion), and the corresponding tokens are not included@in
the current template. Notice that, in the example above, the space
after `|5pt|' also disappeared. The fact that |\tabskip=5pt| occurred
inside an extra level of braces did not make the definition local, since
\TeX\ didn't ``see'' those braces; similarly, if\/ |\tabskip| had been
preceded by `|\global|', \TeX\ wouldn't have make a global definition, it
would just have put `|\global|' into the template. All assignments to
|\tabskip| within the preamble are local to the |\halign| (unless
↑{*globaldefs} is positive), so the value of\/ |\tabskip| was $3\pt$ again
when this particular |\halign| was completed.
\ddanger When `↑{*span}' appears in a preamble, it causes the next token
to be expanded before \TeX\ reads on.
\def\\{{\it c\/}}
\dangerexercise Design a preamble for the following table:
$$\tabskip0pt
\halign to\the\hsize{\sl#\hfil\tabskip.5em plus.5em&
#\hfil\tabskip0pt plus.5em&
\hfil#\tabskip1em plus2em&
\sl#\hfil\tabskip.5em plus.5em&
#\hfil\tabskip0pt plus.5em&
\hfil#\tabskip0pt\cr
England&P. Philips&1560--1628&
Netherlands&J. P. Sweelinck&1562--1621\cr
&J. Bull&\\1563--1628&
&P. Cornet&\\1570--1633\cr
Germany&H. L. Hassler&1562--1612&
Italy&G. Frescobaldi&1583--1643\cr
&M. Praetorius&1571--1621&
Spain&F. Correa de Arauxo&\\1576--1654\cr
France&J. Titelouze&1563--1633&
Portugal&M. R. Coelho&\\1555--\\1635\cr}$$
The tabskip glue should be zero at the left and right of each line; it should be
$1\,$em plus $2\,$em in the center; and it should be ${1\over2}\,$em
plus ${1\over2}\,$em before the names, $0\,$em plus ${1\over2}\,$em before
the dates. Assume that the lines of the table will be specified by, e.g.,
\begintt
France&J. Titelouze&1563--1633&
Portugal&M. R. Coelho&\\1555--\\1635\cr
\endtt
where `|\\|' has been predefined by `|\def\\{{\it c\/}}|'.
↑(organists)
↑(Cornet, Peeter) ↑(Philips, Peter) ↑(Sweelinck, Jan Pieterszoon)
↑(Bull, John) ↑(Titelouze, Jehan) ↑(Hassler, Hans Leo)
↑(Praetorius [Schultheiss], Michael) ↑(Frescobaldi, Girolamo)
↑(Coelho, Manuel Rodrigues) % so listed in Lisbon & Rio, contrary to Groves!
↑(Correa de Arauxo, Francisco)
% The idea for this table came from The Organ and its Music, by Peeters and
% Vente (Antwerp, 1971); but their data was so flaky, I'm not citing them...
\answer |$$\tabskip0pt|\par
|\halign to\the\hsize{\sl#\hfil\tabskip.5em plus.5em&|\par
| #\hfil\tabskip0pt plus.5em&|\par
| \hfil#\tabskip1em plus2em&|\par
| \sl#\hfil\tabskip.5em plus.5em&|\par
| #\hfil\tabskip0pt plus.5em&|\par
| \hfil#\tabskip0pt\cr ...}|\par
\medskip
\ddangerexercise Design a preamble so that the table ↑(Welsh conjugation)
$$\def\welshverb#1={{\bf#1} = }
\tabskip 0pt
\halign to\the\hsize{\welshverb#\hfil\tabskip1em plus1em&
\welshverb#\hfil&\welshverb#\hfil\tabskip0pt\cr
rydw i=I am&ydw i=am I&roeddwn i=I was\cr
rwyt ti=thou art&wyt ti=art thou&roeddet ti=thou wast\cr
mae e=he is&ydy e=is he&roedd e=he was\cr
mae hi=she is&ydy hi=is she&roedd hi=she was\cr
rydyn ni=we are&ydyn ni=are we&roedden ni=we were\cr
rydych chi=you are&ydych chi=are you&roeddech chi=you were\cr
maen nhw=they are&ydyn nhw=are they&roedden nhw=they were\cr}$$
can be specified by typing lines like
\begintt
mae hi=she is&ydy hi=is she&roedd hi=she was\cr
\endtt
\answer The trick is to define a new macro for the preamble:
\begintt
$$\def\welshverb#1={{\bf#1} = }
\tabskip 0pt
\halign to\the\hsize{\welshverb#\hfil\tabskip1em plus1em&
\welshverb#\hfil&\welshverb#\hfil\tabskip0pt\cr
\endtt
\setbox0=\vbox{\lineskip0pt
\tabskip0pt plus1fil\halign to\the\hsize{\tabskip0pt\strut
\hfil#: &\vtop{\parindent=0pt\hsize=16em\hangindent.5em\strut#\strut}\cr
\omit\hfil\sevenrm B.C.&\cr
397&War between Syracuse and Carthage\cr
396&Aristippus of Cyrene and An\-tis\-the\-nes of Athens (philosophers)\cr
395&Athens rebuilds the Long Walls\cr
394&Battles of Coronea and Cnidus\cr
\\393&Plato's
{\sl Apology\/};
Xenophon's
{\sl Memorabilia\/};
Aristophanes'
{\sl Ecclesiazus\ae\/}\cr
391--87&Dionysius subjugates south Italy\cr
391&Isocrates opens his school\cr
390&Evagoras Hellenizes Cyprus\cr
387&``King's Peace''; Plato visits Ar\-chy\-tas of Taras (mathematician)
and Dionysius I\cr
386&Plato founds the Academy\cr
383&Spartans occupy Cadmeia at Thebes\cr
380&Isocrates' {\sl Panegyricus\/}\cr}}
\medskip
\ddangerexercise \hsize=13pc
The line breaks in the second column of the table at the right were chosen
by \TeX\ so that the second column was exactly 16@ems wide. Furthermore,
the author specified one of the rows of the table by typing
$$\halign{#\hfil\cr
|\\393&Plato's {\sl Apology\/};|\cr
| Xenophon's|\cr
| {\sl Memorabilia\/};|\cr
| Aristophanes'|\cr
| {\sl Ecclesiazus\ae\/}\cr|\cr}$$
Can you guess what preamble was used in the alignment? \ [The data comes
from Will ↑{Durant}'s {\sl The Life of Greece\/} (Simon \& Schuster, 1939).]
↑(Aristippus of Cyrene) ↑(Antisthenes of Athens)
↑(Plato) ↑(Xenophon) ↑(Aristophanes) ↑(Dionysius I of Syracuse)
↑(Isocrates) ↑(Evagoras of Salamis) ↑(Archytas of Taras)
\strut\vadjust{\vbox to 0pt{\vss\box0\kern0pt}}% insert the aligned table
\answer |\hfil#: &\vtop{\parindent=0pt\hsize=16em|\par
| \hangindent.5em\strut#\strut}\cr|\par\nobreak\medskip\noindent
With such narrow measure and such long words, the ↑{*tolerance} should probably
also have been increased to, say, 1000 inside the ↑{*vtop}; luckily it turned
out that a higher tolerance wasn't needed.\par
{\sl Note:\/} The stated preamble solves the problem and demonstrates that
\TeX's line-breaking can be used within tables. But this particular table
is not really a good example of the use of\/ |\halign|, because |\TeX| could
typeset it directly, using ↑{*everypar} in an appropriate manner to set up the
hanging indentation, and using |\par| instead of\/ |\cr|. For example, one
could say
\begintt
\hsize20em \parindent0pt \clubpenalty10000 \widowpenalty10000
\def\history#1&{\hangindent4.5em
\hbox to4em{\hss#1: }\ignorespaces}
\everypar{\history}
\def\\{\strut{\it c\/}} % the \strut forces horizontal mode
\endtt
which spares \TeX\ all the work of\/ |\halign| but yields essentially the
same result. ↑(:strut)
\danger Sometimes a template will apply perfectly to all but one or two of
the entries in a column. For example, in the exercise just given, the
colons in the first column of the alignment were supplied by the
template `|\hfil#:|\]'; but the very first entry in that column,
`{\sevenrm B.C.}', did not have a colon. \TeX\ allows you to escape from
the stated template in the following way: If the very first token
of an alignment entry is `↑{*omit}' (after macro expansion), then
the template of the preamble is omitted; the trivial template `|#|'
is used instead. For example, `{\sevenrm B.C.}' was put into the table above
by typing `|\omit\hfil\sevenrm B.C.|' immediately after the preamble.
You can use |\omit| in any column, but it must come first; otherwise \TeX\
will insert the template that was defined in the preamble.
\ddanger If you think about what \TeX\ has to do when it's processing
|\halign|, you'll realize that the timing of certain actions is critical.
Macros are not expanded when the preamble is being read, except as
described earlier; but once the |\cr| at the end of the preamble has been
sensed, \TeX\ must look ahead to see if the next token is |\noalign| or
|\omit|, and macros are expanded until the next non-space token is found.
If the token doesn't turn out to be |\noalign| or |\omit|, it is put
back to be read again, and \TeX\ begins to read the template (still
expanding macros). The template has two parts, called the $u$ and@$v$ parts,
where $u$@precedes the@`|#|' and $v$@follows@it. When \TeX\ has finished
the $u$@part, its reading mechanism goes back to the token that was
neither |\noalign| nor |\omit|, and continues to read the entry until
getting to the |&| or@|\cr| that ends the entry; then the $v$@part of
the template is read. A special internal operation called ↑{*endtemplate}
is always placed at the end of the $v$@part; this causes \TeX\ to put
the entry into an ``↑{unset box}'' whose glue will be set later when
the final column width is known. Then \TeX\ is ready for another entry;
it looks ahead for |\omit| (and also for |\noalign|, after@|\cr|) and
the process continues in the same way.
\ddanger One consequence of the process just described is that it may be
dangerous to begin an entry of an alignment with |\if...|, ↑(conditionals)
or with any macro that will expand into a replacement text whose first token
is |\if...|; the reason is that the condition will be evaluated before the
template has been read. \ (\TeX\ is still looking to see whether an |\omit|
will occur, when the |\if| is being expanded.) \ For example, if\/ ↑{:strut}
has been defined to be an abbreviation for
$$\displaybox{|\ifmmode|\<text for math modes>%
|\else|\<text for nonmath modes>|\fi|}$$
↑(*ifmmode)
and if\/ |\strut| appears as the first token in some alignment entry,
then \TeX\ will expand it into the \<text for nonmath modes> even though
the template might be `|$#$|', because \TeX\ will be in internal
vertical mode when it is looking for a possible |\omit|. Chaos will
probably ensue. Therefore the replacement text for |\strut| in
Appendix@B is actually
\begintt
\relax\ifmmode...
\endtt
and `|\relax|' has also been put into all other macros that might suffer
from such timing problems. Sometimes you do want \TeX\ to expand a
conditional before a template is inserted, but careful macro designers
watch out for cases where this could cause trouble.
\newdimen\digitwidth \setbox0=\hbox{\sixrm0} \setdimen\digitwidth=1wd0
\danger When you're typesetting ↑{numerical tables}, it's common practice
to line up the ↑{decimal points} in a column. For example, if two numbers
like `0.2010' and `297.1' both appear in the same column, you're supposed
to produce `$\catcode`?=13 \def?{\kern\dimen\digitwidth}
??0.2010 \atop 297.1???$'. This result isn't especially pleasing to the
eye, but that's what people do, so you might have to conform to the practice.
One way to handle this is to treat the column as two columns, somewhat as
|\eqalign| treats one formula as two formulas; the `.' can be placed at
the beginning of the second half-column. But the author usually prefers to
use another, less sophisticated method, which takes advantage of the fact
that the digits 0,@1, \dots,@9 have the same width in most fonts: You can
choose a character that's not used elsewhere in the table, say `|?|',
and change it to an active character that produces a blank space exactly
equal to the width of a digit. Then it's usually no chore to put such nulls
into the table entries so that each column can be regarded as either
centered or right-justified or left-justified. For example, `|??0.2010|'
and `|297.1???|' have the same width, so their decimal points will line
up easily. Here is one way to set up `|?|' for this purpose:
\begintt
\newdimen\digitwidth
\setbox0=\hbox{\sixrm0}
\setdimen\digitwidth=1wd0
\catcode`?=13
\def?{\kern\dimen\digitwidth}
\endtt
These definitions should be local to some group, e.g., inside a |\vbox|,
so that `|?|' will resume its normal behavior when the table is finished.
\danger Let's look now at some applications to mathematics. Suppose first
that you want to typeset the small table
$$\vbox{\halign{$\hfil#$ =&&\ \hfil#\hfil\cr
n\phantom)&0&1&2&3&4&5&6&7&8&9&10&11&12&13&14&15&16&17&18&19&20&\dots\cr
{\cal G}(n)&1&2&4&3&6&7&8&16&18&25&32&11&64&31&128&10&256&5&512&28&
1024&\dots\cr}}$$ % The Grundy function for SYM [cf. Winning Ways p441]
as a ↑{display}ed equation. A brute force approach using |\eqalign| or
|\atop| is cumbersome because ${\cal G}(n)$ and $n$ don't always have the
same number of digits. It would be much nicer to type
$$\halign{\indent#\hfil\cr
|$$\vbox{\halign{|\<preamble>|\cr|\cr
| n\phantom)&0&1&2&3& ... &20&\dots\cr|\cr
| {\cal G}(n)&1&2&4&3& ... &1024&\dots\cr}}$$|\cr}$$
for some \<preamble>. On the other hand, the \<preamble> is sure to be
long, since this table has 23 columns; so it looks as though |\settabs|
and |\+| will be easier. \TeX\ has a handy feature that helps a lot
in cases like this: Preambles often have a periodic structure,
↑(periodic preambles) ↑(cyclic preambles)
and if you put an extra `|&|' ↑(ampersand) just before one of the templates,
\TeX\ will consider the preamble to be an infinite sequence that begins
again at the marked template when the |\cr| is reached. For example,
$$\displaybox{$t_1\,$|&|$\,t_2\,$|&|$\,t_3\,$|&&|$\,t_4\,$|&|%
$\,t_5\,$|\cr|}$$
is treated like
$$\displaybox{$t_1\,$|&|$\,t_2\,$|&|$\,t_3\,$|&|$\,t_4\,$|&|$\,t_5\,$%
|&|$\,t_4\,$|&|$\,t_5\,$|&|$\,t_4\,$|&|$\,t_5\,$%
|&|$\,t_4\,$|&|$\,t_5\,$|&|$\,\,\cdots\,$.}$$
The tabskip glue following each template is copied with that template.
The preamble will grow as long as needed, based on the number of columns
actually used by the subsequent alignment entries. Therefore all it takes is
\begintt
$\hfil#$ =&&\ \hfil#\hfil\cr
\endtt
to make a suitable \<preamble> for the ${\cal G}(n)$ problem.
\ddanger Now suppose that the task is to typeset three pairs of displayed
formulas, with all of the =@signs lined up: % cf. ACP Section 3.3.4
$$\vcenter{\openup{3pt}\halign{
$\hfil#$&&${}#\hfil$&\qquad$\hfil#$\cr
V_i&=v_i-q_iv_j,&X_i&=x_i-q_ix_j,&
U_i&=u_i,\qquad\hbox{for $i\ne j$};\cr
V_j&=v_j,&X_j&=x_j,&
U_j&=u_j+\sum_{i\ne j}q_iu_i.\cr}}\eqno(23)$$
It's not easy to do this with three ↑{:eqalign}'s, because the $\sum$ with
a subscript@$j$ makes the right-hand pair of formulas bigger than the
others; the baselines won't agree unless ``↑{phantoms}'' are put into the
other two |\eqalign|'s (see Chapter@19). Instead of using |\eqalign|,
which is defined in Appendix@B to be a macro that uses |\halign|, let's
try to use |\halign| directly. The natural way to approach this display is
to type
$$\halign{\indent#\hfil\cr
|$$\vcenter{\openup{3pt}\halign{|\<preamble>|\cr|\cr
| |\<first line>|\cr |\<second line>|\cr}}\eqno(23)$$|\cr}$$
because the |\vcenter| puts the lines into a box that is properly centered
with respect to the equation number `(23)'; the ↑{:openup} macro puts a
bit of extra space between the lines, as mentioned in Chapter@19.
\ddanger OK, now let's figure out how to type the \<first line> and \<second
line>. The usual convention is to put `|&|' before the symbols that we want
to line up, so the obvious solution is to type
\begintt
V_i&=v_i-q_iv_j,&X_i&=x_i-q_ix_j,&
U_i&=u_i,\qquad\hbox{for $i\ne j$};\cr
V_j&=v_j,&X_j&=x_j,&
U_j&=u_j+\sum_{i\ne j}q_iu_i.\cr
\endtt
Thus the alignment has six columns. We could take common elements into
the preamble (e.g., `|V_|' and `|=v_|'), but that would be too error-prone
and too tricky.
\ddanger The remaining problem is to construct a preamble to
support those lines. To the left of the =@signs we want the column to
be filled at the left; to the right of the =@signs we want it to be filled
at the right. There's a slight complication because we are breaking a math
formula into two separate pieces, yet we want the result to have the same
spacing as if it were one formula. Since we're putting the `|&|' just before
a relation, the solution is to insert `|{}|' at the beginning of the
right-hand formula; \TeX\ will put the proper space before the equals sign
in `|${}=...$|', but it puts no space before the equals sign in `|$=...$|'.
Therefore the desired \<preamble> is
\begintt
$\hfil#$&${}#\hfil$&
\qquad$\hfil#$&${}#\hfil$&
\qquad$\hfil#$&${}#\hfil$
\endtt
The third and fourth columns are like the first and second, except for
the |\qquad| that separates the equations; the fifth and sixth columns
are like the third and fourth. Once again we can use the handy `|&&|'
shortcut ↑(ampersand ampersand) to reduce the preamble to
\begintt
$\hfil#$&&${}#\hfil$&\qquad$\hfil#$
\endtt
With a little practice you'll find that it becomes easy to compose
preambles as you are typing a manuscript that needs them. However, most
manuscripts don't need them, so it may be a@while before you acquire even
a little practice in this regard.
\ddangerexercise Explain how to produce the following display:
$$\openup{3pt}\tabskip 0pt plus1fil
\halign to\the\displaywidth{\tabskip0pt
$\hfil#$&$\hfil{}#{}$&
$\hfil#$&$\hfil{}#{}$&
$\hfil#$&$\hfil{}#{}$&
$\hfil#$&${}#\hfil$\tabskip 0pt plus1fil&
\llap{#}\tabskip0pt\cr
10w&+&3x&+&3y&+&18z&=1,&(9)\cr
6w&-&17x&&&-&5z&=2.&(10)\cr}$$ % cf. ACP Eqs. 4.5.2-17,18
\answer The equation is divided into separate parts for terms and
plus/minus signs, and tabskip glue is used to center it:
\begintt
$$\openup{3pt}\tabskip 0pt plus1fil
\halign to\the\displaywidth{\tabskip0pt
$\hfil#$&$\hfil{}#{}$&
$\hfil#$&$\hfil{}#{}$&
$\hfil#$&$\hfil{}#{}$&
$\hfil#$&${}#\hfil$\tabskip 0pt plus1fil&
\llap{#}\tabskip0pt\cr
10w&+&3x&+&3y&+&18z&=1,&(9)\cr
6w&-&17x&&&-&5z&=2.&(10)\cr}$$
\endtt
\ddanger The next level of complexity occurs when some entries of a table
span two or more columns. \TeX\ provides two ways to handle this. First
↑(spanned columns in tables)
there's ↑{:hidewidth}, which plain \TeX\ defines to be equivalent to
\begintt
\hskip-1000pt plus 1fill
\endtt
In other words, |\hidewidth| has an extremely negative ``natural width,''
but it will stretch without limit. If you put |\hidewidth| at the right of
some entry in an alignment, the effect is to ignore the width of this
entry and to let it stick out to the right of its box. \ (Think about it;
this entry won't be the widest one, when |\halign| figures the column
width.) \ Similarly, if you put |\hidewidth| at the left of an entry, it will
stick out to the left; and you can put |\hidewidth| at both left and right,
as we'll see later.
\ddanger The second way to handle table entries that span columns is to use
the ↑{*span} primitive, which can be used instead of `|&|' in any
line of the table. \ (We've already seen that |\span| means ``expand'' in
preambles; but outside of preambles its use is
completely different.) \ When `|\span|' appears in place of `|&|',
the material before and after the |\span| is processed in the ordinary
way, but afterward it is placed into a single box instead of two boxes.
The width of this combination box is the sum of the individual column
widths plus the width of the tabskip glue between them; therefore the
spanning box will line up with non-spanning boxes in other rows.
\ddanger For example, suppose that there are three columns, with the
respective templates $u_1\,$|#|$\,v_1\,$|&| $u_2\,$|#|$\,v_2\,$|&|
$u_3\,$|#|$\,v_3$; suppose that the column widths are $w_1$, $w_2$,@$w_3$;
suppose that $g_0$,@$g_1$, $g_2$,@$g_3$ are the tabskip glue widths after
the glue has been set; and suppose that the line
$$\displaybox{$a_1$|\span|$\,\,a_2$|\span|$\,\,a_3$|\cr|}$$
has appeared in the alignment. Then the material for
`$u_1a_1v_1u_2a_2v_2u_3a_3v_3$' (i.e., the result `$u_1a_1v_1$' of
column@1 followed by the results of columns 2 and@3) will be placed into
an hbox of width $w_1+g_1+ w_2+g_2+w_3$. That hbox will be preceded by
glue of width@$g_0$ and it will be followed by glue of width@$g_3$, in the
larger hbox that contains the entire aligned line.
\ddanger You can use ↑{*omit} in conjunction with |\span|. For example,
if we continue with the notation of the previous paragraph, the line
$$\displaybox{|\omit|$\,a_1\,$|\span|$\,a_2\,$|\span\omit|$\,a_3\,$|\cr|}$$
would put the material for `$a_1u_2a_2v_2a_3$' into the hbox just considered.
\ddanger It's fairly common to span several columns and to omit all their
templates, so plain \TeX\ provides a ↑{:multispan} macro that spans
a given number of columns. For example, `|\multispan3|' expands into
`|\omit\span\omit\span\omit|'. If the number of spanned columns is
greater than@9, you must put it in braces, e.g., `|\multispan{13}|'.
\ddanger The preceding paragraphs are rather abstract, so let's look at
an example that shows what |\span| actually does. Suppose you type
\begintt
$$\tabskip=3em
\vbox{\halign{&\hrulefill#\hrulefill\cr
first&second&third\cr
first-and-second\span\omit&\cr
&second-and-third\span\omit\cr
first-second-third\span\omit\span\omit\cr}}$$
\endtt
The preamble specifies arbitrarily many templates equal to
`|\hrulefill#\hrulefill|'; the ↑{:hrulefill} macro is like |\hfill|
except that the blank space is filled with a horizontal rule. Therefore
you can see it in the resulting alignment, which shows the spanned columns:
$$\tabskip=3em
\vbox{\halign{&\hrulefill#\hrulefill\cr
first&second&third\cr
first-and-second\span\omit&\cr
&second-and-third\span\omit\cr
first-second-third\span\omit\span\omit\cr}}$$
The rules stop where the tabskip glue separates columns. You don't see
rules in the first line, since the entries in that line were the widest
in their columns. However, if the tabskip glue had been $1\,$em instead
of $3\,$em, the table would have looked like this:
$$\tabskip=1em
\vbox{\halign{&\hrulefill#\hrulefill\cr
first&second&third\cr
first-and-second\span\omit&\cr
&second-and-third\span\omit\cr
first-second-third\span\omit\span\omit\cr}}$$
\ddangerexercise Consider the following table, which is called
↑{Walter's worksheet}: ↑(IRS) % from instructions to form 1040 (1982), p13
$$\displayvbox{
\halign{
\hfil# \hfil&\quad#&\ \hfil#&\ \hfil#\cr
1&Adjusted gross income\dotfill\span\omit\span&\$4,000\cr
2&Zero bracket amount for&\cr
&a single individual\dotfill\span\omit&\$2,300\cr
3&Earned income\dotfill\span\omit&\underbox{ 1,500}\cr
4&Subtract line 3 from line 2\dotfill\span\omit\span&\underbox{ 800}\cr
5&Add lines 1 and 4. Enter here\span\omit\span\cr
&and on Form 1040, line 35\dotfill\span\omit\span&\$4,800\cr}
}$$
Define a preamble so that the following specification will produce
Walter's worksheet.
$$\halign{\indent#\hfil\cr
|\halign{|\<preamble>\cr
| 1&Adjusted gross income\dotfill\span\omit\span&\$4,000\cr|\cr
| 2&Zero bracket amount for&\cr|\cr
| &a single individual\dotfill\span\omit&\$2,300\cr|\cr
| 3&Earned income\dotfill\span\omit&\underbox{ 1,500}\cr|\cr
| 4&Subtract line 3 from line 2\dotfill|\cr
| \span\omit\span&\underbox{ 800}\cr|\cr
| 5&Add lines 1 and 4. Enter here\span\omit\span\cr|\cr
| &and on Form 1040, line 35\dotfill\span\omit\span&\$4,800\cr}|\cr
}$$
(The macro ↑{:dotfill} is like |\hrulefill| but it fills with dots;
the macro ↑{:underbox} puts its argument into an hbox and underlines it.)
\answer |\hfil# \hfil&\quad#&\ \hfil#&\ \hfil#\cr|
\ddanger Notice the ``early'' appearance of\/ ↑{*cr} in line@2 of the
previous exercise. You needn't have the same number of columns in every
line of an alignment; `|\cr|' means that there are no more columns
in the current line.
\medskip
\ddangerexercise Explain how to typeset the ↑{generic matrix}
$\smash{\pmatrix{a_{11}&a_{12}&\ldots&a_{1n}\cr
a_{21}&a_{22}&\ldots&a_{2n}\cr
\multispan4\dotfill\cr
a_{m1}&a_{m2}&\ldots&a_{mn}\cr}.}$
\answer |\pmatrix{a_{11}&a_{12}&\ldots&a_{1n}\cr|\par
| a_{21}&a_{22}&\ldots&a_{2n}\cr|\par
| \multispan4\dotfill\cr|\par
| a_{m1}&a_{m2}&\ldots&a_{mn}\cr}|
\bigskip\medskip
\ddanger The presence of spanned columns adds a complication to \TeX's
rules for calculating column widths; instead of simply choosing the
maximum natural width of the column entries, it's also necessary to
make sure that the sum of certain widths is big enough to accommodate
spanned entries. So here is what \TeX\ actually does: First, if
any pair of adjacent columns is always spanned as a unit (i.e., if
there's a |\span| between them whenever either one is used), these
two columns are effectively merged into one and the tabskip glue between
them is set to zero. This reduces the problem to the case that
every tab position actually occurs as a boundary. Let there be $n$ such
columns, and for $1\le i\le j\le n$ let $w_{ij}$ be the maximum natural
width of all entries that span columns $i$ through@$j$, inclusive;
if there are no such spanned entries, let $w_{ij}=-\infty$. \ (The merging
of dependent columns guarantees that, for each@$j$, there exists $i\le j$
such that $w_{ij}>-\infty$.) \ Let $t_k$ be the natural width of the tabskip
glue between columns $k$ and@$k+1$, for $1\le k<n$. Now the final width
$w_j$ of column@$j$ is determined by the formula
$$\display w_j=\max_{1\le i\le j}\textstyle\bigl(w_{ij}
-\sum_{i\le k<j}(t_k+w_k)\bigr)$$
for $j=1$, 2, \dots, $n$ (in this order). It follows that
$w_{ij}\le w_i+t_i+\cdots+t_{j-1}+w_j$, for all $i\le j$,
as desired. After the widths@$w_j$ are determined, the tabskip amounts
may have to stretch or shrink; if they shrink, $w_{ij}$ might turn out to
be more than the final width of a box that spans columns $i$ through@$j$,
so the glue in that box might shrink.
\ddanger These formulas usually work fine, but sometimes they produce
undesirable effects. For example, suppose that $n=3$, $w_{11}=w_{22}=w_{33}
=10$, $w_{12}=w_{23}=-\infty$, and $w_{13}=100$; in other words, the columns
by themselves are quite narrow, but there's a big wide entry that's
supposed to span all three columns. In this case \TeX's formula makes
$w_1=w_2=10$ but $w_3=80$, so all the excess width is allocated
to the third column. If that's not what you want, the remedy is to use
↑{:hidewidth}, or to increase the natural width of the tabskip glue
between columns.
\ddanger The next level of complexity that occurs in tables is the
appearance of horizontal and vertical ruled lines. People who know
how to make ↑{ruled tables} are generally known as \TeX\ Masters.
↑(TeX Masters) Are you ready?
\ddanger If you approach vertical rules in the wrong manner, they can be
difficult; but there {\sl is\/} a decent way to get them into tables
without shedding too many tears. The first step is to say
`↑{:offinterlineskip}', which means that there will be no blank space
between lines; \TeX\ cannot be allowed to insert ↑{interline glue} in its
normal clever way, because each line is supposed to contain a ↑{*vrule}
that abuts another ↑{*vrule} in the neighboring lines above and/or below.
We will put a strut into every line, by including one in the preamble;
then each line will have the proper height and depth, and there will be no
need for interline glue. Every column entry in an alignment is put into an
hbox whose height and depth are set equal to the height and depth of the
entire line; therefore |\vrule| commands will extend to the top and bottom
of the lines even when their height and/or depth are not specified.
\ddanger A ``column'' should be allocated to every vertical rule, and such
a column can be assigned the template `|\vrule#|'. Then you obtain a
vertical rule by simply leaving the column entries blank, in the normal
lines of the alignment; or you can say `|\omit|' if you want to omit the
rule in some line; or you can say `|height 10pt|' if you want a
nonstandard height; and so on.
\ddanger Here is a small table that illustrates the points just made.
\ [The data appeared in an article by A. H. ↑{Westing}, {\sl BioScience\/
\bf31} (1981), 523--524.]
\def\BC{\hbox to2em{ \sc B.C.\hss}}%
\def\AD{\hbox to2em{ \sc A.D.\hss}}%
$$\hbox to\the\hsize{\vbox{\halign{\indent#\hfil\cr
|\vbox{\offinterlineskip|\cr
|\hrule|\cr
|\halign{&\vrule#&|\cr
| \strut\quad\hfil#\quad\cr|\cr
|height2pt&\omit&&\omit&\cr|\cr
|&Year\hfil&&World Population&\cr|\cr
|height2pt&\omit&&\omit&\cr|\cr
|\noalign{\hrule}|\cr
|height2pt&\omit&&\omit&\cr|\cr
|&8000\BC&&5,000,000&\cr|\cr
|&50\AD&&200,000,000&\cr|\cr
|&1650\AD&&500,000,000&\cr|\cr
|&1850\AD&&1,000,000,000&\cr|\cr
|&1945\AD&&2,300,000,000&\cr|\cr
|&1980\AD&&4,400,000,000&\cr|\cr
|height2pt&\omit&&\omit&\cr}|\cr
|\hrule}|\cr
}}\hfill\vbox{\offinterlineskip
\halign{&\vrule#&
\strut\quad\hfil#\quad\cr
\multispan5\hrulefill\cr
height2pt&\omit&&\omit&\cr
&Year\hfil&&World Population&\cr
height2pt&\omit&&\omit&\cr
\multispan5\hrulefill\cr
height2pt&\omit&&\omit&\cr
&8000\BC&&5,000,000&\cr
&50\AD&&200,000,000&\cr
&1650\AD&&500,000,000&\cr
&1850\AD&&1,000,000,000&\cr
&1945\AD&&2,300,000,000&\cr
&1980\AD&&4,400,000,000&\cr
height2pt&\omit&&\omit&\cr
\multispan5\hrulefill\cr}}}$$
In this example the first, third, and fifth columns are reserved for vertical
rules. Horizontal rules are obtained by saying `↑{*hrule}' outside the
|\halign| or `↑{*noalign}|{\hrule}|' inside it, because the |\halign| appears
in a vbox whose width is the full table width. The horizontal
rules could also have been specified by saying `↑{:multispan}|5\hrulefill|'
inside the |\halign|, since that would produce a rule that spans all
five columns.
\ddanger The only other nonobvious thing about this table is the
inclusion of several lines that say
`|height2pt&\omit&&\omit&\cr|'; can you see what they do? The |\omit|
instructions mean that there's no numerical information, and they
also suppress the ↑{*strut} from the line; the `|height2pt|' makes the
first |\vrule| $2\pt$ high, and the other two rules will follow suit.
Thus, the effect is to extend the vertical rules by two points, where
they touch the horizontal rules. This is a little touch that improves
the appearance of boxed tables; look for it as a mark of quality.
\ddangerexercise Explain why the lines of this table say `|&\cr|' instead of
just `|\cr|'.
\answer `|\cr|' would have omitted the final column, which is a vertical rule.
\ddanger Another way to get vertical rules into tables is to typeset without
them, then back up (using negative glue) and insert them.
\ddanger Here is another table; this one has become a classic, ever since
Michael ↑{Lesk} published it as one of the first examples in his report
on a program to format tables [Bell Laboratories Computing Science
Technical Report {\bf 49} (1976)]. It illustrates several typical
problems that arise in connection with boxed information. In order to
demonstrate \TeX's ability to adapt a table to different circumstances,
tabskip glue is used here to adjust the column widths; the table appears
twice, once generated by `|\halign|@|to125pt|' and once by
`|\halign|@|to200pt|', with nothing else changed.
$$\hbox to\the\hsize{%
\vbox{\tabskip=0pt \offinterlineskip
\def\tablerule{\noalign{\hrule}}
\halign to125pt{\strut#&\vrule#\tabskip=1em plus2em&
\hfil#&\vrule#&\hfil#\hfil&\vrule#&
\hfil#&\vrule#\tabskip=0pt\cr\tablerule
&&\multispan5\hfil AT\&T Common Stock\hfil&\cr\tablerule
&&\omit\hidewidth Year\hidewidth&&
\omit\hidewidth Price\hidewidth&&
\omit\hidewidth Dividend\hidewidth&\cr\tablerule
&&1971&&41--54&&\$2.60&\cr\tablerule
&& 2&&41--54&&2.70&\cr\tablerule
&& 3&&46--55&&2.87&\cr\tablerule
&& 4&&40--53&&3.24&\cr\tablerule
&& 5&&45--52&&3.40&\cr\tablerule
&& 6&&51--59&&.95\rlap*&\cr\tablerule
\noalign{\smallskip}
&\multispan7* (first quarter only)\hfil\cr
}}\hfil
\vbox{\tabskip=0pt \offinterlineskip
\def\tablerule{\noalign{\hrule}}
\halign to200pt{\strut#&\vrule#\tabskip=1em plus2em&
\hfil#&\vrule#&\hfil#\hfil&\vrule#&
\hfil#&\vrule#\tabskip=0pt\cr\tablerule
&&\multispan5\hfil AT\&T Common Stock\hfil&\cr\tablerule
&&\omit\hidewidth Year\hidewidth&&
\omit\hidewidth Price\hidewidth&&
\omit\hidewidth Dividend\hidewidth&\cr\tablerule
&&1971&&41--54&&\$2.60&\cr\tablerule
&& 2&&41--54&&2.70&\cr\tablerule
&& 3&&46--55&&2.87&\cr\tablerule
&& 4&&40--53&&3.24&\cr\tablerule
&& 5&&45--52&&3.40&\cr\tablerule
&& 6&&51--59&&.95\rlap*&\cr\tablerule
\noalign{\smallskip}
&\multispan7* (first quarter only)\hfil\cr}}}$$
The following specification did the job:
$$\halign{\indent#\hfil\cr
|\vbox{\tabskip=0pt \offinterlineskip|\cr
|\def\tablerule{\noalign{\hrule}}|\cr
|\halign to|\<dimen>|{\strut#& \vrule#\tabskip=1em plus2em&|\cr
| \hfil#& \vrule#& \hfil#\hfil& \vrule#&|\cr
| \hfil#& \vrule#\tabskip=0pt\cr\tablerule|\cr
|&&\multispan5\hfil AT\&T Common Stock\hfil&\cr\tablerule|\cr
|&&\omit\hidewidth Year\hidewidth&&|\cr
| \omit\hidewidth Price\hidewidth&&|\cr
| \omit\hidewidth Dividend\hidewidth&\cr\tablerule|\cr
|&&1971&&41--54&&\$2.60&\cr\tablerule|\cr
|&& 2&&41--54&&2.70&\cr\tablerule|\cr
|&& 3&&46--55&&2.87&\cr\tablerule|\cr
|&& 4&&40--53&&3.24&\cr\tablerule|\cr
|&& 5&&45--52&&3.40&\cr\tablerule|\cr
|&& 6&&51--59&&.95\rlap*&\cr\tablerule \noalign{\smallskip}|\cr
|&\multispan7* (first quarter only)\hfil\cr}}|\cr}$$
Points of interest are: (1)@The first column contains a strut; otherwise
it would have been necessary to put a strut on the lines that say
`AT\&T' and `(first quarter only)', since those lines omit the templates
of all other columns that might have a built-in strut. (2)@`↑{:hidewidth}'
is used in the title line so that the width of columns will be affected
only by the width of the numeric data. (3)@`↑{:rlap}' is used so that
the asterisk doesn't affect the alignment of the numbers. (4)@`|\&|' is
used to typeset an ampersand. (5)@If the tabskip specification had
been `|0em plus3em|' instead of `|1em plus2em|', the alignment wouldn't
have come out right, because that would make `AT\&T Common Stock' wider
than the natural width of everything it spans; the excess width would
all have been given to the `Dividend' column.
\ddangerexercise Explain how to add $2\pt$ more space above and below
`AT\&T Common Stock'.
\answer One way is to include two lines just before and after the title
line, saying `|\omit&height2pt&\multispan5&\cr|'. Another way is to
put |\bigstrut| into some column of the title line, for some appropriate
invisible box |\bigstrut| of width zero.
\ddangerexercise Typeset the following chart, making it exactly 36em wide:
↑(family tree)
↑(Bohning [Knuth], Louise Marie)
↑(Ehlert [Bohning], Pauline Anna Marie)
↑(B\"ohning, Martin John Henry)
↑(Wischmeyer [Ehlert], Clara Louise)
↑(Ehlert, Ernst Fred)
↑(Blase [B\"ohning], Maria Dorothea)
↑(B\"ohning, Jobst Heinrich)
$$\vbox{\tabskip0pt \offinterlineskip
\halign to 36em{\tabskip0pt plus1em#&
#\hfil\hfil\hfil\tabskip0pt\cr
&&&&&\strut J. H. B\"ohning, 1838&\cr
&&&&\multispan3\hrulefill\cr
&&&\strut M. J. H. B\"ohning, 1882&\vrule\cr
&&\multispan3\hrulefill\cr
&&\vrule&&\vrule&\strut M. D. Blase, 1840&\cr
&&\vrule&&\multispan3\hrulefill\cr
&\strut L. M. Bohning, 1912&\vrule\cr
\multispan3\hrulefill\cr
&&\vrule&&&\strut E. F. Ehlert, 1845&\cr
&&\vrule&&\multispan3\hrulefill\cr
&&\vrule&\strut P. A. M. Ehlert, 1884&\vrule\cr
&&\multispan3\hrulefill\cr
&&&&\vrule&\strut C. L. Wischmeyer, 1850&\cr
&&&&\multispan3\hrulefill\cr
}}$$
\answer The trick is to have ``empty'' columns at the extreme left and right;
then the |\hrulefill|'s are able to span the tabskip glue.
\begintt
$$\vbox{\tabskip0pt \offinterlineskip
\halign to 36em{\tabskip0pt plus1em#&
#\hfil\hfil\hfil\tabskip0pt\cr
&&&&&\strut J. H. B\"ohning, 1838&\cr
&&&&\multispan3\hrulefill\cr
&&&\strut M. J. H. B\"ohning, 1882&\vrule\cr
&&\multispan3\hrulefill\cr
&&\vrule&&\vrule&\strut M. D. Blase, 1840&\cr
&&\vrule&&\multispan3\hrulefill\cr
&\strut L. M. Bohning, 1912&\vrule\cr
\multispan3\hrulefill\cr
&&\vrule&&&\strut E. F. Ehlert, 1845&\cr
&&\vrule&&\multispan3\hrulefill\cr
&&\vrule&\strut P. A. M. Ehlert, 1884&\vrule\cr
&&\multispan3\hrulefill\cr
&&&&\vrule&\strut C. L. Wischmeyer, 1850&\cr
&&&&\multispan3\hrulefill\cr}}$$
\endtt
\ddanger If you're having trouble ↑{debugging} an alignment, it sometimes
helps to put `↑{:ddt}' at the beginning and end of the templates in your
preamble. This is an undefined control sequence that causes \TeX\ to stop,
displaying the rest of the template. When \TeX\ stops, you can use
|\showlists| and other commands to see what the machine thinks it's doing.
If \TeX\ doesn't stop, you know that it never reached that part of the
template.
\ddanger It's possible to have alignments within alignments. Therefore when
\TeX\ sees a `|&|' or `|span|' or `|\cr|', it needs some way to decide which
alignment is involved. The rule is that an entry ends when `|&|' or `|\span|'
or `|\cr|' occurs at the same level of braces that was current when the
entry began; i.e., there must be an equal number of left and right ↑{braces}
in every entry. For example, in the line
\begintt
\matrix{1&1\cr 0&1\cr}&\matrix{0&1\cr 0&0\cr}\cr
\endtt
\TeX\ will not resume the template for the first column when it is
scanning the argument to |\matrix|, because the |&|'s and |\cr|'s in
that argument are enclosed in braces. Similarly, |&|'s and |\cr|'s in
the preamble do not denots the end of a template unless the resulting
template would have an equal number of left and right braces.
\ddanger You have to be careful with the use of |&| and ↑{*span} and ↑{*cr},
↑(ampersand)
because these tokens are interrupted by \TeX's scanner even when it is
not expanding macros. For example, if you say `|\let\x=\span|' in the
midst of an alignment entry, \TeX\ will think that the `|\span|' ends
the entry, so |\x| will become equal to the first token following the
`|#|' in the template. You can hide this |\span| by putting it in
braces; e.g., `|{\global\let\x=\span}|'.
\ddanger Sometimes people forget the |\cr| on the last line of an
alignment. This can cause mysterious effects, because \TeX\ is not
clairvoyant. For example, consider the following apparently simple case:
\begintt
\halign{\centerline{#}\cr
A centered line.\cr
And another?}
\endtt
(Notice the missing |\cr|.) \ A curious thing happens here when \TeX\
processes the erroneous line, so please pay attention. The template
begins with `|\centerline{|', so \TeX\ starts to scan the argument to
|\centerline|. Since there's no `|\cr|' after the question mark, the `|}|'
after the question mark is treated as the end of the argument to
|\centerline|, {\sl not\/} as the end of the |\halign|. \TeX\ isn't going
to be able to finish the alignment unless the subsequent text has
the form `|...{...\cr|'. Indeed, an entry like `|a}b{c|' is legitimate
with respect to the template `|\centerline{#}|', since it yields
`|\centerline{a}b{c}|'; \TeX\ is correct when it gives no error message in
this case. But the computer's idea of the current situation is
different from the user's, so a puzzling error message will probably occur
a few lines later.
\ddanger To help avoid such situations, there's a primitive command
↑{*crcr} that acts exactly like |\cr| except that it does nothing
when it immediately follows a |\cr| or a |\noalign{...}|. Thus, when
you write a macro like |\matrix|, you can safely insert |\crcr|
at the end of the user's argument; this will cover up an error if the
user forgot the final |\cr|, and it will cause no harm if the final
|\cr| was present.
\ddanger Are you tired of typing |\cr|? ↑(cr, avoiding)
You can get plain \TeX\ to insert an automatic |\cr| at the end of
each input line in the following way:
$$\halign{\indent#\hfil\cr
|\begingroup \let\par=\cr \obeylines %|\cr
|\halign{|\<preamble>\cr
\ \ \ \<first line of alignment>\cr
\ \ \ \ \dots\cr
\ \ \ \<last line of alignment>\cr
| }\endgroup|\cr}$$
This works because ↑{:obeylines} makes the ascii ↑{<return} into
an active character that uses the current meaning of\/ ↑{*par}, and
\TeX\ always puts \<return> at the end of an input line (see Chapter@9).
If you don't want a@|\cr| at the end of a certain line,
just type `|%|' and the corresponding |\cr|
will be ``commented out.'' ↑(percent) \ (This special mode doesn't
work with ↑{:+} lines, since |\+| is a macro whose argument is delimited
by the token `|\cr|', not simply by a token that has same meaning as@|\cr|.
↑(delimited arguments) But you can redefine |\+| to overcome this hurdle,
if you want to. For example, define a macro |\alternateplus| that
is just like |\+| except that its argument is delimited by the
active character |↑↑M|; then include the command `|\let\+=\alternateplus|'
as part of\/ |\obeylines|.)
\danger The control sequence ↑{*valign} is analogous to |\halign|, but
rows and columns change r\↑oles. In this case |\cr| marks the bottom of
a column, and the aligned columns are vboxes that are put together in
horizontal mode. The individual entries of each column are vboxed with
depth zero (i.e., as if\/ ↑{*boxmaxdepth} were zero, as explained in
Chapter@12); the entry heights for each row of a |\valign| are maximized
in the same fashion as the entry widths for each column of an@|\halign|
are maximized. The ↑{*noalign} operation can now be used to insert
horizontal mode material between columns; the ↑{*span} operation now
spans rows. ↑(spanned rows in tables) People usually work with \TeX\
at least a year before they find their first application for |\valign|;
and then it's usually a one-row `|\valign{\vfil#\vfil\cr...}|'.
But the general mechanism is there if you need it.
\endchapter
If sixteen pennies are arranged in the form of a square
there will be the same number of pennies in every row, column,
and each of the two long diagonals.
Can you do the same with twenty pennies?
\author HENRY ERNEST ↑{DUDENEY}, {\sl Puzzles and Curious Problems\/} (1931)
% (this is puzzle number 293)
% (posthumous publication; original was in some newspaper)
\immediate\write\ans{}
\immediate\write\ans{\string\ansno\chapno.$\infty$:}
\copytoblankline (Solution to Dudeney's problem.) \
Let |\one| and |\two| be macros that produce a vertical list
denoting one and two pennies, respectively. The problem can be
solved with ↑{*valign} as follows:
\begintt
\valign{\vfil#&\vfil#&\vfil#&\vfil#\cr
\two&\one&\one&\one\cr
\one&\one&\two&\one\cr
\one&\one&\one&\two\cr
\one&\two&\one&\one\cr}
\endtt
Since |\valign| transposes rows and columns, the result is:
$$\def\pennytop{\hbox to 24pt{\manual\char'130\hfil}}
\def\pennyedge{\hbox{\manual\char'133}}
\def\one{\pennytop\pennyedge}
\def\two{\one\pennyedge}
\baselineskip0pt\lineskip0pt\tabskip14pt
\displaybox{\valign{\vfil#&\vfil#&\vfil#&\vfil#\cr
\two&\one&\one&\one\cr
\one&\one&\two&\one\cr
\one&\one&\one&\two\cr
\one&\two&\one&\one\cr}}$$
\bigskip
It was she who controlled the whole of the Fifth Column.
\author AGATHA ↑{CHRISTIE}, {\sl N or M?\/} (1941) % chapter 5, part 1
\eject
\beginchapter Chapter 23. Output Routines
[Not yet written. Will open with discussion of the plain \TeX\ output
routine (how to get headings and footings, etc.); then will discuss ↑{*mark}
and ↑{*topmark} and such things; then will have example of multicolumn
format.]
\endchapter
I think you will like them,
when you shall see them on a beautiful quarto page,
where a neat rivulet of text
shall meander through a meadow of margin.
'Fore Gad they will be the most elegant things of their kind!
\author RICHARD BRINSLEY ↑{SHERIDAN}, %
{\sl The School for Scandal\/} (1777) % Act I Sc 1
\bigskip
The influence of technical changes upon outputs
through variation in the general investment level $\beta$
is so small that actually it could have been neglected.
\author WASSILY W. ↑{LEONTIEF} {\sl The Structure of American Economy, %
1919--1929\/} (1941)
\eject
\beginchapter Chapter 24. Summary of\\Vertical Mode
[Not yet written. Will open with summary of plain \TeX\ features for
section headings, some of which have not yet been mentioned.
Then will discuss things that \TeX\ does the same in all modes.
Then will discuss things that \TeX\ does only in vertical and
internal vertical mode.]
\endchapter
The first and most striking feature is the Verticality of composition,
as opposed to the Horizontality of all anterior structural modes.
\author COCKBURN ↑{MUIR}, {\sl Pagan or Christian?\/} (1860) % p61
\bigskip
Sometimes when I have finished a book I give a summary of the whole of it.
\author ROBERT WILLIAM ↑{DALE}, {\sl Nine Lectures on Preaching} (1878)
% viii.231
\eject
\beginchapter Chapter 25. Summary of\\Horizontal Mode
[Not yet written. Will discuss things that \TeX\ does only in horizontal
mode and restricted horizontal mode (only).]
\endchapter
\strut{\rm Otherwise.} %
You may reduce all\/ {\rm Verticals} into\/ {\rm Horizontals}.
\author JOSEPH ↑{MOXON}, {\sl A Tutor to Astronomie and Geographie\/} (1659)
\bigskip
\strut\tt! You can't use `\char`\\moveleft' in horizontal mode.
\author \TeX\ (1982)
\eject
\beginchapter Chapter 26. Summary of\\Math Mode
[Not yet written. Will discuss what \TeX\ does in math mode before
Appendix@G takes over; includes a syntactic description.]
\endchapter
The tale of Math is a complex one,
and it resists both a simple plot summary
and a concise statement of its meaning.
\author PATRICK K. ↑{FORD}, {\sl The Mabinogi\/} (1977) % p89
% from his introduction to "Math Son of Mathonwy"
\bigskip
Mathematics is known in the trade as {\rm difficult,} or {\rm penalty, copy}
because it is slower, more difficult, and more expensive to set in type
than any other kind of copy normally occurring in books and journals.
\author UNIVERSITY OF ↑{CHICAGO} PRESS, {\sl A Manual of Style\/} %
(1969) % 12th edition, page 295
\eject
\beginchapter Chapter 27. Recovery from\\Errors
[Not yet written. Will not list all the error messages, but will discuss
a few typical ones, especially those that \TeX\ itself promises to
deal with here---some of the help messages refer to this chapter.]
\endchapter
Who can understand his errors?
\author ↑{DAVID}, {\sl Psalm 19\thinspace:\thinspace12\/} (c.@1000 B.C.)
\bigskip
It is one thing, to shew a Man that he is in an Error,
and another, to put him in possession of Truth.
\author JOHN ↑{LOCKE}, {\sl An Essay Concerning Humane Understanding\/} (1690)
% bk 4 ch 7 sec 11
\eject
\beginchapter Appendix A. Answers to\\All the\\Exercises
The preface to this manual points out the wisdom of trying to figure out
each exercise before you look up the answer here. But these answers are intended
to be read, since they occasionally provide additional information that
you are best equipped to understand when you have just worked on a problem.
\immediate\closeout\ans % this makes the answers file ready
\ninepoint
\input answers
\endchapter
If you can't solve a problem,
you can always look up the answer.
But please, try first to solve it by yourself;
then you'll learn more and you'll learn faster.
\author DONALD E. ↑{KNUTH}, {\sl The \TeX book\/} (1983)
\bigskip
How answer you for your selues?
\author WILLIAM ↑{SHAKESPEARE}, {\sl Much Adoo About Nothing\/} (1598)
% Act IV, Scene 2, line 25
\eject
\beginchapter Appendix B. Basic\\Control\\Sequences
[Not yet written. Will include all the code from plain \TeX\ and a
few cursory comments, followed by tabular summaries of all the
operations, grouped alphabetically and grouped by function for ready
reference.]
\endchapter
The purpose of a programming system is to make a computer easy to use.
To do this, it furnishes languages and various facilities
that are in fact programs invoked and controlled by language features.
But these facilities are bought at a price:
the external description of a programming system is ten to twenty times
as large as the external description of the computer system itself.
The user finds it far easier to specify any particular function,
but there are far more to choose from,
and far more options and formats to remember.
\author FREDERICK P. ↑{BROOKS}, JR. {\sl The Mythical Man Month\/} (1975)
% p 43
\bigskip
When someone says, ``I want a programming language
in which I need only say what I wish done,''
give him a lollipop.
\author ALAN ↑{PERLIS}, {\sl Epigrams on Programming\/} (1982)
% SIGPLAN Notices 17,9 (September 82), 7--13.
% There are many more, like "Editing is a rewording activity."
\eject
\beginchapter Appendix C. Character\\Codes
[Not yet written. Will be a short discussion of \TeX's internal
character code, with emphasis on {\sl short}. This book is already
too long $\dots$@.]
\endchapter
Code sets obtained by modifying the standard as shown above
or by other replacements are nonstandard.
\author ASA SUBCOMMITTEE X3.2, {\sl American Standard\break %
Code for Information Interchange\/↑(ascii)} (1963)
% in {\sl Communications of the ACM\/}
\bigskip
Both the Stanford and DEC uses of the ASCII control characters
are in violation of the USA Standard Code,
but no Federal Marshal is likely to come running out
and arrest people who type control-T to their computers.
\author BRIAN ↑{REID}, {\sl SCRIBE Introductory User's Manual\/} (1978) % p82
\eject
\beginchapter Appendix D. Dirty Tricks
[Not yet written. Will discuss a dozen miscellaneous problems and
solutions, including approaches to verbatim mode.]
\medskip
Q.In what order are the control sequences |\a|, |\b|, and |\c|
expanded when \TeX\ reads the following text?
\begintt
\expandafter\expandafter\expandafter\a\expandafter\b\c
\endtt
A. First |\c| is expanded, then |\b|, then |\a|. \ (The next level
of reverse expansion occurs with |\x\x\x\x\x\x\x\a\x\x\x\b\x\c\d|, if you
|\let\x=\expandafter|.)
\endchapter
↑{Wolfe}, who had moved around the desk and into his chair,
↑(Hombert, Humbert) put up a palm at him: ``Please, Mr.@Hombert.
I think it is always advisable to take a short-cut when it is feasible.''
\author REX ↑{STOUT}, {\sl The Rubber Band\/} (1936)
\bigskip
``My dear ↑{Watson}, try a little analysis yourself,''
said he, ↑(Holmes) with a touch of impatience.
``You know my methods. Apply them,
and it will be instructive to compare results.''
\author CONAN ↑{DOYLE}, {\sl The Sign of the Four\/} (1890)
\eject
\beginchapter Appendix E. Example Formats
[Not yet written. Will include a set of macros for business correspondence,
another for music programs, and the macros used to produce {\sl The \TeX book}.]
\endchapter
It is much easier to use macros than to define them.
$\ldots$
The use of macro libraries, in fact, mirrors almost exactly
the use of subroutine libraries for programming languages.
There are the same levels of specialization,
from publicly shared subroutines
to special subroutines within a single program,
and there is the same need for a programmer
with particular skills to define the subroutines.
\author PETER ↑{BROWN}, {\sl Macro Processors\/} (1974) % p10
\bigskip
The ↑{epigraph} is among the most delightful of scholarly habits.
Donald ↑{Knuth}'s work on fundamental algorithms would be
just as important if he hadn't begun with a quotation
from Betty ↑{Crocker}, but not so enjoyable.
Part of the fun of an epigraph is turning a source to an unexpected use.
\author MARY-CLAIRE ↑{VAN LEUNEN}, {\sl A Handbook for Scholars\/} (1978)
% page 53. [But it was McCall's, not Betty Crocker]
\eject
\beginchapter Appendix F. Font Tables
% I put the old appendix F into file APPF.TEX
[Not yet written. Will show the layout of the Computer Modern fonts
used by the plain \TeX\ macro package, and will summarize the
names of the math symbols therein.]
\endchapter
Seek not for fresher founts afar,
Just drop your bucket where you are.
\author SAM WALTER ↑{FOSS}, {\sl Back Country Poems\/} (1892)
\bigskip
No one compositor will have all the signs and symbols available.
The number of special signs and symbols is almost limitless,
with new ones being introduced all the time.
\author UNIVERSITY OF ↑{CHICAGO} PRESS, {\sl A Manual of Style\/} (1969)
\eject
\beginchapter Appendix G. Generating Boxes\\from Formulas
\ninepoint
People who define new math fonts and/or macros sometimes need to know
exactly how \TeX\ manipulates the constituents of formulas. The purpose
of this appendix is to explain the precise positioning rules by which
\TeX\ converts a math list into a horizontal list. \ (It is a good idea
to review the introduction to ↑{math lists} in Chapter@17 before
reading further; ``double dangerous bends'' are implied throughout
this appendix.)
\TeX\ relies on lots of parameters when it typesets formulas, and you have
the option of changing any or all of them. But of course you will want to
know what each parameter means, before you change it. Therefore each rule
below is numbered, and a table appears at the end to show which rules
depend on which parameters.
The most important parameters appear in the ↑{symbol fonts} (family@2) and
the ↑{extension fonts} (family@3). \TeX\ will not typeset a formula unless
↑{*textfont}|2|, ↑{*scriptfont}|2|, and ↑{*scriptscriptfont}|2| each contain
at least 22@↑{*texinfo} parameters. For brevity we shall call these parameters
$\sigma↓1$ to $\sigma↓{22}$, where the parameter is taken from
|\textfont2| if the current style is display or text ($D$ or $D'$ or $T$
or $T'$), from |\scriptfont2| if the current style is $S$ or $S'$, and from
|\scriptscriptfont2| otherwise. Similarly, the three fonts in family@3
must each have at least 13@|\texinfo| parameters, and we will denote
them by $\xi↓1$ to $\xi↓{13}$. The notation $\xi↓9$, for example, stands for
the ninth parameter of\/ |\scriptfont3|, if \TeX\ is typesetting something
in |\scriptstyle|.
A math list is a sequence of items of the various kinds listed in Chapter@17,
and \TeX\ typesets a formula by converting a math list to a horizontal
list. When such typesetting begins, \TeX\ has two other pieces of
information in addition to the math list itself. \ (a)@The starting style
tells what style should be used for the math list, unless another style
is specified by a style item. For example, the starting style for a
displayed formula is $D$, but for an equation in the text or an equation
number it is $T$; and for a subformula it can be any one of the eight
↑{styles} defined in Chapter@17. We shall use $C$ to stand for the current
style, and we shall say that the math list is being typeset in style@$C$.
\ (b)@The typesetting is done either with or without penalties. Formulas
in the text of a paragraph are converted to horizontal lists in which
additional penalty items are inserted after binary operations and relations,
in order to aid in line breaking. Such penalties are not inserted in
other cases, because they would serve no useful function.
The eight styles are considered to be $D>D'>T>T'>S>S'>\it SS>SS'$, in
decreasing order. Thus, $C\le S$ means that the current style is $S$,
$S'$, $\it SS$, or@$\it SS'$. Style $C'$ means the current style with a
prime added if one isn't there; for example, we have $C'=T'$ if and only
if $C=T$ or $C=T'$. Style@$C\mathord\uparrow$ is the superscript style
for $C$; this means style@$S$ if $C$@is $D$ or@$T$, style@$S'$ if $C$@is
$D'$ or $T'$, style@$\it SS$ if $C$@is $S$ or $\it SS$, and style@$\it
SS'$ if $C$@is $S'$ or $\it SS'$. Finally, style@$C\mathord\downarrow$ is
the subscript style, which is@$(C\mathord\uparrow)'$.
Chapter 17 stated that the most important components of math lists are
called atoms, and that each atom has three fields called its nucleus,
subscript, and superscript. We frequently need to execute a subroutine
called ``Set box@$x$ to the so-and-so field in style such-and-such.'' This
means (a)@if the specified field is empty, $x$@is set equal to a null box;
(b)@if the field contains a symbol, $x$@is set to an hbox containing that
symbol in the appropriate size, and the ↑{italic correction} for the
character is included in the width of the box; (c)@if the field contains a
math list or horizontal list, $x$@is set to an hbox containing the result
of typesetting that list with the specified starting style. In case@(c),
the glue is set with no stretching or shrinking, and an additional level
of hboxing is omitted if it turns out to be redundant.
Another subroutine sets box $x$ to a specified variable ↑{delimiter},
having a specified minimum height plus depth. This means that a search is
conducted as follows: The delimiter is defined by two symbols, a ``small
character''@$a$ in family@$f$ and a ``large character''@$b$ in family@$g$.
The search looks first at character $a$ in scriptscriptfont@$f$, if $C\le
\it SS$; then it looks at $a$ in scriptfont@$f$, if $C\le S$; then it looks at
$a$ in textfont@$f$. If nothing suitable is found from $a$ and@$f$, the
larger alternative $b$ and $g$ is examined in the same way. Either
$(a,f)$ or $(b,g)$ may be $(0,0)$, which means that the corresponding part
of the search is to be bypassed. When looking at a character in a
font, the search stops immediately if that character has sufficient height
plus depth, or if the character is ↑{extensible}; furthermore, if the
character does not stop the search, but if it has a ↑{successor} in the
font, the successor is looked at next. \ (See the \MF\ manual or the
system documentation of |TFM| files for further information about
successors and extensible characters.) \ If the search runs all the way to
completion without finding a suitable character, the one with greatest
height plus depth is chosen. If no characters at all were found (either
because $a=f=b=g=0$ or because the characters did not exist in the fonts),
$x$@is set to an empty box whose width is ↑{*nulldelimiterspace}. If an
extensible character was found, $x$@is set to a vbox containing enough
pieces to build up a character of sufficient size; the height of this vbox
is the height of the topmost piece. ↑(built-up characters) Otherwise
$x$@is set to an hbox containing the character that was found; the italic
correction is included in the width of this box.
There's also a subroutine that ``reboxes'' a given box to a given@width.
If the box doesn't already have the desired width, \TeX\ unpackages it
(unless it was a vbox), then adds a kern for an italic correction if one was
implied, and inserts ↑{*hss} glue at both left and right; the resulting
horizontal list is packaged into an hbox. This process is used, for
example, to give a common width to the numerator and denominator of a
fraction; it centers whichever is smaller, unless infinite glue is present
in addition to the newly added |\hss|.
If $x$ is a box, we shall use the abbreviations $h(x)$, $d(x)$, and $w(x)$ for
its height, depth, and width, respectively.
Here now are the rules for typesetting a given math list in starting
style@$C$. The process applies from left to right, translating each
item in turn, depending on its type. Two passes are made over the list;
most of the work is done by the first pass, which compiles individual
translations of the math items; we shall consider this part of the
task first.
\def\rule#1.{\smallskip\textindent{\bf#1.}\ignorespaces}
\rule 1. If the current item is a rule or discretionary or penalty or
``whatsit'' or boundary item, simply leave it unchanged and move to the
next item.
\rule 2. If the current item is glue or a kern, translate it as follows:
If it is glue from ↑{*nonscript}, check if the immediately following item
is glue or a kern; and if so, removed that item if $C\le S$. Otherwise, if the
current item is from ↑{*mskip} or ↑{*mkern}, convert from |mu| to absolute
units by multiplying each finite dimension by ${1\over18}\sigma↓6$. Then
move on to the next item.
\rule 3. If the current item is a style change, set $C$ to the specified
style. Delete the current item from the list and move on to the next.
\rule 4. If the current item is a four-way choice, it contains four math
lists for the four main styles. Replace it by the math list that corresponds
to the current style $C$, then move to the first unprocessed item.
\rule 5. If the current item is a Bin atom, and if this was the first atom
in the list, or if the most recent previous atom was Bin, Op, Rel, Open,
or Punct, change the current Bin to Ord and continue with Rule@14.
Otherwise continue with Rule@17.
\rule 6. If the current item is a Rel or Close or Punct atom, and if the most
recent previous atom was Bin, change that previous Bin to Ord. Continue
with Rule@17.
\rule 7. If the current item is an Open or Inner atom, go directly to Rule@17.
\rule 8. If the current item is a Vcent atom (from ↑{*vcenter}), let its
nucleus be a vbox of height@$h$ and depth@$d$. Change the height to
${1\over2}(h+d)+a$ and the depth to ${1\over2}(h+d)-a$, where $a$@is the
current axis height, $\sigma↓{22}$. Change this atom to type Ord and continue
with Rule@17.
\rule 9. If the current item is an Over atom (from ↑{*overline}), set
box@$x$ to the nucleus in style $C'$. Then replace the nucleus by a vbox
containing kern@$\theta$, hrule of@height@$\theta$, kern@$3\theta$,
and box@$x$, from top to bottom, where $\theta=\xi↓8$ is the ↑{default
rule thickness}. \ (This puts a rule over the nucleus, with $3\theta$ clearance,
and with $\theta$@units of extra white space assumed to be present above
the rule.) \ Continue with Rule@16.
\rule 10. If the current item is an Under atom (from ↑{*underline}), set
box@$x$ to the nucleus in style $C$. Then replace the nucleus by a vtop
made from box@$x$, kern@$3\theta$, and hrule of@height@$\theta$,
where $\theta=\xi↓8$ is the default rule thickness; and add $\theta$ to the
depth of the box. \ (This puts a rule under the nucleus, with $3\theta$
clearance, and with $\theta$@units of extra white space assumed to be
present below the rule.) \ Continue with Rule@16.
\rule 11. If the current item is a Rad atom (from ↑{*radical}, e.g.,
↑{:sqrt}), set box@$x$ to the nucleus in style $C'$. Let $\theta=\xi↓8$;
and let $\varphi=\sigma↓5$ if $C>T$, otherwise $\varphi=\theta$.
Set $\psi=\theta+{1\over4}\vert\varphi\vert$; this is the minimum
clearance that will be allowed between box@$x$ and the rule that will go
above it. Set box@$y$ to a variable delimiter for this radical atom, having
height plus depth $h(x)+d(x)+\psi+\theta$ or more. Then set $\theta\leftarrow
h(y)$; this is the thickness of the rule to be used in the radical
construction. \ (Note that the font designer specifies the thickness of
the rule by making it the height of the radical character; the baseline
of the character should be precisely at the bottom of the rule.) \
If $d(y)>h(x)+d(x)+\psi$, increase $\psi$ by half of the excess; i.e.,
set $\psi\leftarrow{1\over2}\bigl(\psi+d(y)-h(x)-d(x)\bigr)$. Construct
a vbox consisting of kern@$\theta$, hrule of@height@$\theta$, kern@$\psi$,
and box@$x$, from top to bottom. The nucleus of the radical atom is now
replaced by box@$y$ raised by $h(x)+\psi$, followed by the new vbox.
Continue with Rule@16.
\rule 12. If the current item is an Acc atom (from ↑{*mathaccent}),
just go to Rule@16 if the accent character doesn't exist in the
current size. Otherwise set box@$x$ to the nucleus in style $C'$.
If the accent character has a successor in its font whose width is
$\le w(x)$, change it to the successor and repeat this sentence.
Now set $\psi\leftarrow h(x)-\chi$, where $\chi$ is |\texinfo| parameter@5
(the ↑{x-height}) in the accent font. If $\psi<0$, reset $\psi\leftarrow0$;
$\psi$@is the amount by which the accent will be raised. Put the accent
into a new box@$y$, including the italic correction. Set $v\leftarrow w(y)$,
then set $w(y)\leftarrow 0$. Replace the nucleus of the Acc atom
by a vbox consisting of: box@$y$ moved right ${1\over2}\bigl(w(x)-v\bigr)$,
kern $\psi-h(x)$, and box@$x$. \ (Thus, the accent character has been
centered over the nucleus, and the resulting box has the width of the
original nucleus.) \ Continue with Rule@16.
\rule 13. If the current item is an Op atom, mark this atom as having
limits if it has been marked with ↑{*limits}, or if it has been marked
with ↑{*displaylimits} and $C>T$. If the nucleus is not a symbol, set
$\delta\leftarrow0$ and go to Rule@13a. Otherwise if $C>T$ and if the
nucleus symbol has a successor in its font, move to the successor. \ (This
is where operators like $\sum$ and $\int$ change to a larger size in
display styles.) \ Put the symbol into a new box@$x$, in the current size,
and set $\delta$ to the italic correction for the character; include $\delta$
in the width of box@$x$ if and only if limits are to be set or
there is no subscript. Shift box@$x$ down by ${1\over2}\bigl(h(x)+d(x)\bigr)
-a$, where $a=\sigma↓{22}$, so that the operator character is centered
vertically on the axis; this shifted box becomes the nucleus of the Op atom.
\rule 13a. If limits are not to be typeset for this Op atom, go to
Rule@17; otherwise the limits are attached as follows:
Set box@$x$ to the superscript field in style $C\mathord\uparrow$; set box@$y$
to the nucleus field in style@$C$; and set box@$z$ to the subscript field
in style $C\mathord\downarrow$. Rebox all three of these boxes to width
$\max\bigl(w(x),w(y),w(z)\bigr)$. If the superscript field was not empty,
attach box@$x$ above box@$y$, separated by a kern of size
$\max\bigl(\xi↓9,\xi↓{11}-d(x)\bigr)$, and shift box@$x$ right by
${1\over2}\delta$; also put a kern of size@$\xi↓{13}$ above box@$x$.
If the subscript field was not empty,
attach box@$z$ above box@$y$, separated by a kern of size
$\max\bigl(\xi↓{10},\xi↓{12}-h(z)\bigr)$, and shift box@$z$ left by
${1\over2}\delta$; also put a kern of size@$\xi↓{13}$ below box@$z$.
The resulting vbox becomes the nucleus of the current Op atom; move to
the next item.
\rule 14. If the current item is an Ord atom, go to Rule@17 unless
all of the following are true: The nucleus is a symbol; the subscript
and superscript are both empty; the very next item in the math list is an
atom of type Ord, Op, Bin, Rel, Open, Close, or Punct; and the nucleus of the
next item is a symbol whose family is the same as the family in the present
Ord atom. In such cases the present symbol is marked as a text symbol.
If the font information shows a ligature between this symbol and the
following one, using the specified family and the current size, then delete
the present atom, insert the ligature character into the symbol of the
following item, and move to that item. Otherwise if the font information
shows a kern beetween the current symbol and the next, insert a kern item
after the current Ord atom and move to the next item after that.
Otherwise (i.e., if no ligature or kern is specified between the present
text symbol and the following character), go to Rule@17.
\rule 15. If the current item is a generalized fraction (and it had better
be, because that's the only possibility left if Rules 1--14 don't apply),
let $\theta$ be the thickness of the bar line and let $(\lambda,\rho)$ be
the left and right delimiters. \ If this fraction was generated by
↑{*over} or ↑{*overwithdelims}, then $\theta=\xi↓8$; if it was generated by
↑{*atop} or ↑{*atopwithdelims}, $\theta=0$; otherwise it was generated by
↑{*above} or ↑{*abovewithdelims}, and a specific value of@$\theta$ was
given at that time. The values of $\lambda$ and $\rho$ are null unless
the fraction is ``with delims.'')
\rule 15a. Put the numerator into box@$x$, using style $T$ or $T'$ if
$C$@is $D$ or $D'$, otherwise using style $C\mathord\uparrow$.
Put the denominator into box@$z$, using style $T'$ if
$C>T$, otherwise using $C\mathord\downarrow$.
If $w(x)<w(z)$, rebox $x$ to width@$w(z)$;
if $w(z)<w(x)$, rebox $z$ to width@$w(x)$.
\rule 15b. If $C>T$, set $u\leftarrow\sigma↓8$ and $v\leftarrow\sigma↓{11}$.
Otherwise set $u\leftarrow\sigma↓9$ or $\sigma↓{10}$, according as $\theta\ne0$
or $\theta=0$, and set $v\leftarrow\sigma↓{12}$. \ (The fraction will be
typeset with its numerator shifted up by an amount@$u$ with respect to
the current baseline, and with the denominator shifted down by@$v$,
unless the boxes are unusually large.)
\rule 15c. If $\theta=0$ (|\atop|), the numerator and denominator are
combined as follows: Set $\varphi\leftarrow7\xi↓8$ or
$3\xi↓8$, according as $C>T$ or $C\le T$; $\varphi$ is the minimum
clearance that will be tolerated between numerator and denominator.
Let $\psi=\bigl(u-d(x)\bigr)-\bigl(h(z)-v\bigr)$ be the actual clearance
that would be obtained with the current values of $u$ and@$v$; if
$\psi<\varphi$, add ${1\over2}(\varphi-\psi)$ to both $u$ and@$v$.
Then construct a vbox of height $h(x)+u$ and depth $d(z)+v$, consisting
of box@$x$ followed by an appropriate kern followed by box@$y$.
\rule 15d. If $\theta\ne0$ (|\over|), the numerator and denominator are
combined as follows: Set $\varphi\leftarrow3\theta$ or $\theta$, according
as $C>T$ or $C\le T$; $\varphi$ is the minimum clearance that will be
tolerated between numerator or denominator and the bar line. Let
$a=\sigma↓{22}$ be the current axis height; the middle of the bar line
will be placed at this height. If
$\bigl(u-d(x)\bigr)-(a+{1\over2}\theta)<\varphi$, increase@$u$ by the
difference between these quantities; and if $(a+{1\over2}\theta)-
\bigl(h(z)-v\bigr)<\varphi$, increase@$v$ by the difference. Finally
construct a vbox of height $h(x)+u$ and depth $d(z)+v$, consisting of
box@$x$ followed by a kern followed by an hrule of height@$\theta$
followed by another kern followed by box@$z$, where the kerns are figured
so that the bottom of the hrule occurs at $a-{1\over2}\theta$ above the
baseline.
\rule 15e. Enclose the vbox that was constructed in Rule 15c or 15d by
delimiters whose height plus depth is at least $\sigma↓{20}$, if $C>T$, and at
least $\sigma↓{21}$ otherwise. Shift the delimiters up or down so that they are
vertically centered with respect to the axis. Replace the generalized
fraction by an Inner atom whose nucleus is the resulting sequence of three boxes
(left delimiter, vbox, right delimiter).
\bigbreak\noindent
Rules 1--15 account for the preliminary processing of math list items;
but we still haven't specified how subscripts and superscripts are to be
typeset. Therefore some of those rules lead to the following post-process:
\rule 16. Change the current item to an Ord atom, and continue with Rule@17.
\rule 17. If the nucleus of the current item is a math list, replace it by
a box obtained by typesetting that list in the current style. Then if the
nucleus is not simply a symbol, go on to Rule@18. Otherwise we are in the
common case that a math symbol is to be translated to its horizontal-list
equivalent: Convert the symbol to a character box for the specified family
in the current size. If the symbol was not marked by Rule@14 above as a
text symbol, or if\/ |\texinfo| parameter number@2 of its font is zero, set
$\delta$ to the italic correction; otherwise set $\delta$ to zero. If
$\delta$ is nonzero and if the subscript field of the current atom is
empty, insert a kern of width@$\delta$ after the character box, and set
$\delta$ to zero. Continue with Rule@18.
\rule 18. (The remaining task for the current atom is to attach a possible
subscript and superscript.) \ If both subscript and superscript fields
are empty, move to the next item. Otherwise continue with the following
subrules:
\rule 18a. If the translation of the nucleus is a character box, possibly
followed by a kern, set $u$ and@$v$ equal to zero; otherwise set
$u\leftarrow h-q$ and $v\leftarrow d+r$, where $h$ and@$d$ are the height
and depth of the translated nucleus, and where $q$ and@$r$ are the values
of $\sigma↓{18}$ and $\sigma↓{19}$ in the font corresponding to styles
$C\mathord\uparrow$ and@$C\mathord\downarrow$. \ (The quantities $u$
and@$v$ represent minimum amounts by which the superscript and subscript
will be shifted up and down; these preliminary values of $u$ and@$v$ may
be increased later.)
\rule 18b. If the superscript field is empty (so that there is a subscript
only), set box@$x$ to the subscript in style@$C\mathord\downarrow$, and add
↑{*scriptspace} to $w(x)$. Append this box to the translation of the
current item, shifting it down by $\max\bigl(v,\sigma↓{16},h(x)-{4\over5}
\vert\sigma↓5\vert\bigr)$, and move to the next item. \ (The idea is
to make sure that the subscript is shifted by at least $v$ and by at
least $\sigma↓{16}$; furthermore, the top of the subscript should not extend
above $4\over5$ of the current x-height.)
\rule 18c. Set box@$x$ to the superscript field in style@$C\mathord\uparrow$,
and add |\scriptspace| to@$w(x)$. Then set $u\leftarrow\max\bigl(u,p,
d(x)+{1\over4}\vert\sigma↓5\vert\bigr)$, where $p=\sigma↓{13}$ if $C=D$,
$p=\sigma↓{15}$ if $C=C'$, and $p=\sigma↓{14}$ otherwise; this gives a
tentative position for the superscript.
\rule 18d. If the subscript field is empty (so that there is a
superscript only), append box@$x$ to the translation of the current atom,
shifting it up by $u$, and move to the next item. Otherwise (i.e.,
both subscript and superscript are present), set box@$y$ to the
subscript in style@$C\mathord\downarrow$, add |\scriptspace| to@$w(y)$,
and set $v\leftarrow\max(v,\sigma↓{17})$.
\rule 18e. (The remaining task is to position a joint
subscript/superscript combination.) \ Let $\theta=\xi↓8$ be the default
rule thickness. If $\bigl(u-d(x)\bigr)-\bigl(h(y)-v\bigr)\ge4\theta$, go
to Rule@18f. \ (This means that the white space between subscript and
superscript is at least $4\theta$.) \ Otherwise reset $v$ so that
$\bigl(u-d(x)\bigr)-\bigl(h(y)-v\bigr)=4\theta$. Let $\psi={4\over5}
\vert\sigma↓5\vert-\bigl(u-d(x)\bigr)$. If $\psi>0$, increase $u$
by@$\psi$ and decrease $v$ by@$\psi$. \ (This means that the bottom of the
superscript will be at least as high above the baseline as $4\over5$ of
the x-height.)
\rule 18f. Finally, let $\delta$ be zero unless it was set to a nonzero
value by Rules 13 or 17. \ (This is the amount of horizontal displacement
between subscript and superscript.) Make a vbox of height $h(x)+u$ and
depth $d(y)+v$, consisting of box@$x$ shifted right by@$\delta$, followed
by an appropriate kern, followed by box@$y$. Append this vbox to the
translation of the current item and move to the next.
\bigbreak\noindent
After the entire math list has been processed by Rules 1--18, \TeX\ looks
at the last atom (if there was one), and changes its type from
Bin or Ord (if it was of type Bin). Then the following rule is performed:
\rule 19. If the math list begins and ends with boundary items, compute
the maximum height@$h$ and depth@$d$ of the boxes in the translation of
the math list that was made on the first pass, taking into account the
fact that some boxes may be raised or lowered. Let $a=\sigma↓{22}$ be the
axis height, and let $\delta=\max(h-a,d+a)$ be the amount by which the
formula extends away from the axis. Replace the boundary items by
delimiters whose height plus depth is at least $\max(\lfloor\delta/500\rfloor
f,2\delta-l)$, where $f$ is the ↑{*delimiterfactor}
and $l$ is the ↑{*delimitershortfall}. Shift the delimiters up or down so
that they are vertically centered with respect to the axis. Change the left
boundary item to an Open atom and the right boundary item to
a Close atom. \ (All of the calculations in this step are done with
$C$ equal to the starting style of the math list; style items in the
middle of the list do not affect the style of the right boundary item.)
\medbreak \rule 20. Rules 1--19 convert the math list into a sequence of
items in which the only remaining atoms are of types Ord, Op, Bin, Rel,
Open, Close, Punct, and Inner. After that conversion is complete, a second pass
is made through the entire list, replacing all of the atoms by the boxes
and kerns in their translations. Furthermore, additional inter-element
spacing is inserted just before each atom except the first, based on the
type of that atom and the preceding one. Inter-element spacing is defined
by the three parameters ↑{*thinmuskip}, ↑{*medmuskip}, and
↑{*thickmuskip}; the |mu| units are converted to to absolute units as in
Rule@2 above. Chapter@18 has a chart that defines the inter-element
spacing, some of which is ↑{*nonscript}, i.e., it is inserted only in
styles $>S$. The list might also contain style items, which are removed
during the second pass; they are used to change the current style just as
in the the first pass, so that both passes have the same value of@$C$ when
they work on any particular atom.
\rule 21. Besides the inter-element spacing, penalties are placed after
the translation of each atom of type Bin or Rel, if the math list was part
of a paragraph. The penalty after a Bin is ↑{*binoppenalty}, and the
penalty after a Rel is ↑{*relpenalty}. However, the penalty is not
inserted after the final item in the entire list, or if it has a numeric
value@$\ge10000$, or if the very next item in the list is already a
penalty item, or after a Rel atom that is immediately followed by another
Rel atom.
\rule 22. After all of the following actions have been performed, the math
list has been totally converted to a horizontal list. If the result is
being inserted into a larger horizontal list, in horizontal mode or
restricted horizontal mode, it is enclosed by ``↑{math-on}'' and
``↑{math-off}'' items that each record the current value of
↑{*mathsurround}. Or if this list is a displayed formula, it
is processed further as explained in Chapter@19.
\medbreak
\def\\{\kern1pt}
\noindent{\bf Summary of parameter usage.} \ Here is the promised index
that refers to everything affected by the mysterious parameters
in the symbol fonts. Careful study of the rules allows you to get the
best results by appropriately setting the parameters for new fonts
that you may wish to use in mathematical typesetting. Each font parameter
has an external name that is used in supporting software packages;
for example, $\sigma↓{14}$ is generally referred to as `sup2' and $\xi↓8$ as
`default\_\\ rule\_thickness'. These external names are indicated in the table.
$$\vbox{
\halign to\the\hsize{$#\hfil$\tabskip10pt plus 10pt minus 3pt&
#\hfil&$\rm#\hfil$\quad&
$#\hfil$\hfil&$\rm#\hfil$\tabskip0pt\cr
\omit\span\omit\it Parameter&\omit\it Used in\hfil&
\omit\span\omit\it Parameter&\omit\it Used in\hfil\cr
\noalign{\medskip\hrule\smallskip}
\sigma↓2&space&17&
\sigma↓{17}&sub2&18d\cr
\sigma↓5&x\_\\height&11, 18b, 18c, 18e&
\sigma↓{18}&sup\_drop&18a\cr
\sigma↓6&quad&2, 20&
\sigma↓{19}&sub\_drop&18a\cr
\sigma↓8&num1&15b&
\sigma↓{20}&delim1&15e\cr
\sigma↓9&num2&15b&
\sigma↓{21}&delim2&15e\cr
\sigma↓{10}&num3&15b&
\sigma↓{22}&axis\_\\height&8, 13, 15d, 19\cr
\sigma↓{11}&denom1&15b&
\xi↓8&default\_\\rule\_thickness&9, 10, 11, 15, 15c, 18e\cr
\sigma↓{12}&denom2&15b&
\xi↓9&big\_op\_\\spacing1&13a\cr
\sigma↓{13}¹&18c&
\xi↓{10}&big\_op\_\\spacing2&13a\cr
\sigma↓{14}²&18c&
\xi↓{11}&big\_op\_\\spacing3&13a\cr
\sigma↓{15}³&18c&
\xi↓{12}&big\_op\_\\spacing4&13a\cr
\sigma↓{16}&sub1&18b&
\xi↓{13}&big\_op\_\\spacing5&13a\cr
\noalign{\smallskip\hrule\medskip}}}$$
Besides the symbol and extension fonts (families 2 and@3), the
rules above also refer to parameters in other families:
Rule@17 uses |\texinfo| parameter@2 (space) to determine whether to insert
an italic correction between adjacent letters, and Rule@12 uses parameter@5
(x\_\\height) to position an accent character. Several non-font parameters
also affect the typesetting of mathematics:
Dimension parameters
\hbox{|delimiterlimit|} (Rule@19),
\hbox{|\nulldelimiterspace|} (in
the construction of variable delimiters for Rules 11, 15e,@19),
\hbox{|\mathsurround|} (Rule@22),
and
\hbox{|\scriptspace|} (Rules 18bcd);
integer parameters
\hbox{|\delimiterfactor|} (Rule@19),
\hbox{|\binoppenalty|} (Rule@21),
and
\hbox{|\relpenalty|} (Rule@21);
muglue parameters
\hbox{|\thinmuskip|},
\hbox{|\medmuskip|,} and
\hbox{|\thickmuskip|}
(Rule@20).
\endchapter
Woe to the author who always wants to teach!\/
% Mais malheur a` l'auteur qui veut toujours instruire!
The secret of being a bore is to tell everything.
% Le secret d'ennuyer est celui de tout dire.
\author ↑{VOLTAIRE}, {\sl De la Nature de l'Homme\/} (1737)
% 6th discours en vers sur l'homme
\bigskip
Very few Compositors are fond of Algebra,
and rather chuse to be employed upon plain work.
\author PHILIP ↑{LUCKOMBE}, {\sl The History and Art of Printing\/} (1770)
% page 472
\eject
\beginchapter Appendix H. Hyphenation
{\raggedright
[Not yet written. Will discuss Liang's general method, but will not
include the list of patterns. I may use some of the following words
in demonstrations: supercalifragilisticexpialidocious,
pneumonoultramicroscopicsilicovolcanoconiosis,
Llanfairpwllgwyngyllgogerychwyrndrobwllllantysiliogogogoch,
Constantinopolitanischerdudelsackspfeifenmachersgesellschafft,
Nihilistendynamittheaterkaestchenssprengungsattentaetsversuchungen,
Transvaaltruppentropentransporttrampelthiertreibertrauungsthraenentragoedie,
Mekkamuselmannenmassenmenchenmoerdermohrenmuttermarmormonumentenmachen;
the last four are from Mark Twain's {\sl Connecticut Yankee}, Chapter@23.]
\par}
\endchapter
If all problems of hyphenation have not been solved,
at least some progress has been made
since that night, when according to legend,
an RCA Marketing Manager received a phone call
from a disturbed customer. His 301 had just hyphenated ``God.''
\author PAUL E. ↑{JUSTUS}, {\sl There's More to Typesetting Than %
Setting Type\/} (1972)
% in {\sl IEEE Transactions on Professional Commun. vol PC-15, pp. 13-15
\bigskip
The committee skeptically re-
commended more study for a bill
to require warning labels on rec-
ords with subliminal messages re-
corded backward.
\author THE PENINSULA ↑{TIMES TRIBUNE} (April 28, 1982)
\eject
\beginchapter Appendix I. Index
%Things to do at last minute:
%look for `f |\' and `f ↑{*' and `f ↑{:' (put \/ in)
%look for Appendix (put \null in)
[In progress. I'll indicate the main source of information about
each item in some special way, so that the reader doesn't have to
wade through all pages where a topic is mentioned.
Appendix@B and Appendix@F serve in part as supplementary indexes.]
\endchapter
Important works such as histories, biographies,
scientific and technical text-books, etc., should contain indexes.
Indeed, such works are scarcely to be considered complete without indexes.
\smallskip
An index is almost invariably placed at the end of a volume
and is set in smaller type than the text-matter.
Its subjects should be thoroughly alphabetized.
\smallskip
The compiling of an index is interesting work, though
some authors are apt to find it tedious and delegate the work to others.
The proofreader who undertakes it will find that it is splendid mental exercise
and brings out his latent editorial capability.
\author ALBERT H. ↑{HIGHTON}, {\sl Practical Proofreading\/} (1926)
\bigskip
Important references are given in boldface.
Italicized numbers indicate fleeting references,
whereas numbers in parentheses refer to
mere implications or unwarranted extrapolations.
Asterisks are used to identify particularly distasteful passages.
\author PROF.@PETER ↑{SCHICKELE}, {\sl The Definitive Biography of %
P. D. Q. ↑{Bach}\/} (1976)
\eject
\beginchapter Appendix J. Joining the\\\TeX\ Community
[Will be a short note about the \TeX\ Users Group.]
\endchapter
[The printer] should refuse to emply wandering men,
foreigners who, after having committed some grievous error,
can easily disappear and return to their own country.
\author HIERONYMUS ↑{HORNSCHUCH}, $O\rho\mkern1mu\theta o\mkern1mu %
\tau\upsilon\pi o\gamma\rho\alpha\varphi %
\mathaccent'16\iota\alpha\varsigma$ (1608)
\bigskip
An author writing an article for publication in TUGboat
is encouraged to create it on a computer file and submit it on magnetic tape.
\author BARBARA ↑{BEETON}, {\sl {\tt\char`\\title} How to Prepare a File\/%
{\tt\char`\\cr}\phantom{\rm\enskip(1981)}\break %
For Publication in TUGboat\/{\tt\char`\\cr}} (1981) % TUGboat 2,1 p53
\eject
\end